Fix financials ; Remove broken decryption & scraping

pull/1568/head
ValueRaider 2023-06-21 14:49:16 +01:00
parent cd4816e289
commit 1ce9ce2784
11 changed files with 267 additions and 961 deletions

View File

@ -42,11 +42,6 @@ Yahoo! finance API is intended for personal use only.**
---
## News [2023-01-27]
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-market data. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. ~Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary). `info` will continue to exist for as long as there are elements without a fast alternative.~ `info` now fixed and much faster than before.
## Quick Start
### The Ticker module
@ -74,9 +69,6 @@ msft.splits
msft.capital_gains # only for mutual funds & etfs
# show share count
# - yearly summary:
msft.shares
# - accurate time-series count:
msft.get_shares_full(start="2022-01-01", end=None)
# show financials:
@ -96,25 +88,6 @@ msft.major_holders
msft.institutional_holders
msft.mutualfund_holders
# show earnings
msft.earnings
msft.quarterly_earnings
# show sustainability
msft.sustainability
# show analysts recommendations
msft.recommendations
msft.recommendations_summary
# show analysts other work
msft.analyst_price_target
msft.revenue_forecasts
msft.earnings_forecasts
msft.earnings_trend
# show next event (earnings, etc)
msft.calendar
# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
msft.earnings_dates

View File

@ -63,9 +63,8 @@ setup(
'requests>=2.26', 'multitasking>=0.0.7',
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
# 'pycryptodome>=3.6.6',
'cryptography>=3.3.2',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
# Note: Pandas.read_html() needs html5lib & beautifulsoup4
entry_points={
'console_scripts': [
'sample=sample:main',

View File

@ -71,19 +71,20 @@ class TestTicker(unittest.TestCase):
dat.news
dat.earnings_dates
# These require decryption which is broken:
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
# These haven't been ported Yahoo API
# dat.shares
# dat.info
# dat.calendar
# dat.recommendations
# dat.earnings
# dat.quarterly_earnings
# dat.income_stmt
# dat.quarterly_income_stmt
# dat.balance_sheet
# dat.quarterly_balance_sheet
# dat.cashflow
# dat.quarterly_cashflow
# dat.recommendations_summary
# dat.analyst_price_target
# dat.revenue_forecasts
@ -122,6 +123,13 @@ class TestTicker(unittest.TestCase):
dat.news
dat.earnings_dates
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
# These require decryption which is broken:
# dat.shares
# dat.info
@ -129,12 +137,6 @@ class TestTicker(unittest.TestCase):
# dat.recommendations
# dat.earnings
# dat.quarterly_earnings
# dat.income_stmt
# dat.quarterly_income_stmt
# dat.balance_sheet
# dat.quarterly_balance_sheet
# dat.cashflow
# dat.quarterly_cashflow
# dat.recommendations_summary
# dat.analyst_price_target
# dat.revenue_forecasts
@ -211,7 +213,7 @@ class TestTickerHistory(unittest.TestCase):
self.assertFalse(data.empty, "data is empty")
# Below will fail because decryption broken
# Below will fail because not ported to Yahoo API
# class TestTickerEarnings(unittest.TestCase):
# session = None
@ -367,270 +369,243 @@ class TestTickerMiscFinancials(unittest.TestCase):
self.assertIsInstance(data, pd.Series, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
# Below will fail because decryption broken
def test_income_statement(self):
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365
# def test_income_statement(self):
# expected_keys = ["Total Revenue", "Basic EPS"]
# expected_periods_days = 365
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# # Test contents of table
# data = self.ticker.get_income_stmt(pretty=True)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# period = abs((data.columns[0]-data.columns[1]).days)
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# # Test property defaults
# data2 = self.ticker.income_stmt
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# # Test pretty=False
# expected_keys = [k.replace(' ', '') for k in expected_keys]
# data = self.ticker.get_income_stmt(pretty=False)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
# # Test to_dict
# data = self.ticker.get_income_stmt(as_dict=True)
# self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_income_statement(self):
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365//4
# def test_quarterly_income_statement(self):
# expected_keys = ["Total Revenue", "Basic EPS"]
# expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# # Test contents of table
# data = self.ticker.get_income_stmt(pretty=True, freq="quarterly")
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# period = abs((data.columns[0]-data.columns[1]).days)
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# # Test property defaults
# data2 = self.ticker.quarterly_income_stmt
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# # Test pretty=False
# expected_keys = [k.replace(' ', '') for k in expected_keys]
# data = self.ticker.get_income_stmt(pretty=False, freq="quarterly")
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
# # Test to_dict
# data = self.ticker.get_income_stmt(as_dict=True)
# self.assertIsInstance(data, dict, "data has wrong type")
def test_balance_sheet(self):
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365
# def test_quarterly_income_statement_old_fmt(self):
# expected_row = "TotalRevenue"
# data = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# self.assertIn(expected_row, data.index, "Did not find expected row in index")
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# data_cached = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
# self.assertIs(data, data_cached, "data not cached")
# Test property defaults
data2 = self.ticker.balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# def test_balance_sheet(self):
# expected_keys = ["Total Assets", "Net PPE"]
# expected_periods_days = 365
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# # Test contents of table
# data = self.ticker.get_balance_sheet(pretty=True)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# period = abs((data.columns[0]-data.columns[1]).days)
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
# # Test property defaults
# data2 = self.ticker.balance_sheet
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
def test_quarterly_balance_sheet(self):
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365//4
# # Test pretty=False
# expected_keys = [k.replace(' ', '') for k in expected_keys]
# data = self.ticker.get_balance_sheet(pretty=False)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# # Test to_dict
# data = self.ticker.get_balance_sheet(as_dict=True)
# self.assertIsInstance(data, dict, "data has wrong type")
# Test property defaults
data2 = self.ticker.quarterly_balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# def test_quarterly_balance_sheet(self):
# expected_keys = ["Total Assets", "Net PPE"]
# expected_periods_days = 365//4
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# # Test contents of table
# data = self.ticker.get_balance_sheet(pretty=True, freq="quarterly")
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# period = abs((data.columns[0]-data.columns[1]).days)
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True, freq="quarterly")
self.assertIsInstance(data, dict, "data has wrong type")
# # Test property defaults
# data2 = self.ticker.quarterly_balance_sheet
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
def test_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365
# # Test pretty=False
# expected_keys = [k.replace(' ', '') for k in expected_keys]
# data = self.ticker.get_balance_sheet(pretty=False, freq="quarterly")
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# Test contents of table
data = self.ticker.get_cashflow(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# # Test to_dict
# data = self.ticker.get_balance_sheet(as_dict=True, freq="quarterly")
# self.assertIsInstance(data, dict, "data has wrong type")
# Test property defaults
data2 = self.ticker.cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# def test_quarterly_balance_sheet_old_fmt(self):
# expected_row = "TotalAssets"
# data = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# self.assertIn(expected_row, data.index, "Did not find expected row in index")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# data_cached = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
# self.assertIs(data, data_cached, "data not cached")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
# def test_cash_flow(self):
# expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
# expected_periods_days = 365
def test_quarterly_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365//4
# # Test contents of table
# data = self.ticker.get_cashflow(pretty=True)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# period = abs((data.columns[0]-data.columns[1]).days)
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test contents of table
data = self.ticker.get_cashflow(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# # Test property defaults
# data2 = self.ticker.cashflow
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test property defaults
data2 = self.ticker.quarterly_cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# # Test pretty=False
# expected_keys = [k.replace(' ', '') for k in expected_keys]
# data = self.ticker.get_cashflow(pretty=False)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# # Test to_dict
# data = self.ticker.get_cashflow(as_dict=True)
# self.assertIsInstance(data, dict, "data has wrong type")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
# def test_quarterly_cash_flow(self):
# expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
# expected_periods_days = 365//4
def test_income_alt_names(self):
i1 = self.ticker.income_stmt
i2 = self.ticker.incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.financials
self.assertTrue(i1.equals(i3))
# # Test contents of table
# data = self.ticker.get_cashflow(pretty=True, freq="quarterly")
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
# period = abs((data.columns[0]-data.columns[1]).days)
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
i1 = self.ticker.get_income_stmt()
i2 = self.ticker.get_incomestmt()
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials()
self.assertTrue(i1.equals(i3))
# # Test property defaults
# data2 = self.ticker.quarterly_cashflow
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
i1 = self.ticker.quarterly_income_stmt
i2 = self.ticker.quarterly_incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.quarterly_financials
self.assertTrue(i1.equals(i3))
# # Test pretty=False
# expected_keys = [k.replace(' ', '') for k in expected_keys]
# data = self.ticker.get_cashflow(pretty=False, freq="quarterly")
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# for k in expected_keys:
# self.assertIn(k, data.index, "Did not find expected row in index")
i1 = self.ticker.get_income_stmt(freq="quarterly")
i2 = self.ticker.get_incomestmt(freq="quarterly")
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials(freq="quarterly")
self.assertTrue(i1.equals(i3))
# # Test to_dict
# data = self.ticker.get_cashflow(as_dict=True)
# self.assertIsInstance(data, dict, "data has wrong type")
def test_balance_sheet_alt_names(self):
i1 = self.ticker.balance_sheet
i2 = self.ticker.balancesheet
self.assertTrue(i1.equals(i2))
# def test_quarterly_cashflow_old_fmt(self):
# expected_row = "NetIncome"
# data = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# self.assertIn(expected_row, data.index, "Did not find expected row in index")
i1 = self.ticker.get_balance_sheet()
i2 = self.ticker.get_balancesheet()
self.assertTrue(i1.equals(i2))
# data_cached = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
# self.assertIs(data, data_cached, "data not cached")
i1 = self.ticker.quarterly_balance_sheet
i2 = self.ticker.quarterly_balancesheet
self.assertTrue(i1.equals(i2))
# def test_income_alt_names(self):
# i1 = self.ticker.income_stmt
# i2 = self.ticker.incomestmt
# self.assertTrue(i1.equals(i2))
# i3 = self.ticker.financials
# self.assertTrue(i1.equals(i3))
i1 = self.ticker.get_balance_sheet(freq="quarterly")
i2 = self.ticker.get_balancesheet(freq="quarterly")
self.assertTrue(i1.equals(i2))
# i1 = self.ticker.get_income_stmt()
# i2 = self.ticker.get_incomestmt()
# self.assertTrue(i1.equals(i2))
# i3 = self.ticker.get_financials()
# self.assertTrue(i1.equals(i3))
def test_cash_flow_alt_names(self):
i1 = self.ticker.cash_flow
i2 = self.ticker.cashflow
self.assertTrue(i1.equals(i2))
# i1 = self.ticker.quarterly_income_stmt
# i2 = self.ticker.quarterly_incomestmt
# self.assertTrue(i1.equals(i2))
# i3 = self.ticker.quarterly_financials
# self.assertTrue(i1.equals(i3))
i1 = self.ticker.get_cash_flow()
i2 = self.ticker.get_cashflow()
self.assertTrue(i1.equals(i2))
# i1 = self.ticker.get_income_stmt(freq="quarterly")
# i2 = self.ticker.get_incomestmt(freq="quarterly")
# self.assertTrue(i1.equals(i2))
# i3 = self.ticker.get_financials(freq="quarterly")
# self.assertTrue(i1.equals(i3))
i1 = self.ticker.quarterly_cash_flow
i2 = self.ticker.quarterly_cashflow
self.assertTrue(i1.equals(i2))
# def test_balance_sheet_alt_names(self):
# i1 = self.ticker.balance_sheet
# i2 = self.ticker.balancesheet
# self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_cash_flow(freq="quarterly")
i2 = self.ticker.get_cashflow(freq="quarterly")
self.assertTrue(i1.equals(i2))
# i1 = self.ticker.get_balance_sheet()
# i2 = self.ticker.get_balancesheet()
# self.assertTrue(i1.equals(i2))
def test_bad_freq_value_raises_exception(self):
self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
# i1 = self.ticker.quarterly_balance_sheet
# i2 = self.ticker.quarterly_balancesheet
# self.assertTrue(i1.equals(i2))
# i1 = self.ticker.get_balance_sheet(freq="quarterly")
# i2 = self.ticker.get_balancesheet(freq="quarterly")
# self.assertTrue(i1.equals(i2))
# def test_cash_flow_alt_names(self):
# i1 = self.ticker.cash_flow
# i2 = self.ticker.cashflow
# self.assertTrue(i1.equals(i2))
# i1 = self.ticker.get_cash_flow()
# i2 = self.ticker.get_cashflow()
# self.assertTrue(i1.equals(i2))
# i1 = self.ticker.quarterly_cash_flow
# i2 = self.ticker.quarterly_cashflow
# self.assertTrue(i1.equals(i2))
# i1 = self.ticker.get_cash_flow(freq="quarterly")
# i2 = self.ticker.get_cashflow(freq="quarterly")
# self.assertTrue(i1.equals(i2))
# Below will fail because not ported to Yahoo API
# def test_sustainability(self):
# data = self.ticker.sustainability
@ -685,9 +660,6 @@ class TestTickerMiscFinancials(unittest.TestCase):
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# def test_bad_freq_value_raises_exception(self):
# self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
class TestTickerInfo(unittest.TestCase):
session = None
@ -717,17 +689,13 @@ class TestTickerInfo(unittest.TestCase):
for k in f:
self.assertIsNotNone(f[k])
# Below will fail because decryption broken
# def test_info(self):
# data = self.tickers[0].info
# self.assertIsInstance(data, dict, "data has wrong type")
# self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
# self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
def test_info(self):
data = self.tickers[0].info
self.assertIsInstance(data, dict, "data has wrong type")
self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
# def test_fast_info_matches_info(self):
# yf.scrapers.quote.PRUNE_INFO = False
# fast_info_keys = set()
# for ticker in self.tickers:
# fast_info_keys.update(set(ticker.fast_info.keys()))

View File

@ -47,7 +47,6 @@ import json as _json
import logging
_BASE_URL_ = 'https://query2.finance.yahoo.com'
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
_ROOT_URL_ = 'https://finance.yahoo.com'
class TickerBase:
@ -58,7 +57,6 @@ class TickerBase:
self._history_metadata = None
self._history_metadata_formatted = False
self._base_url = _BASE_URL_
self._scrape_url = _SCRAPE_URL_
self._tz = None
self._isin = None
@ -86,13 +84,6 @@ class TickerBase:
# Limit recursion depth when repairing prices
self._reconstruct_start_interval = None
def stats(self, proxy=None):
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
# get info and sustainability
data = self._data.get_json_data_stores(proxy=proxy)["QuoteSummaryStore"]
return data
@utils.log_indent_decorator
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
@ -1592,7 +1583,7 @@ class TickerBase:
return dict_data
return data
def get_income_stmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
def get_income_stmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
"""
:Parameters:
as_dict: bool
@ -1604,19 +1595,13 @@ class TickerBase:
freq: str
"yearly" or "quarterly"
Default is "yearly"
legacy: bool
Return old financials tables. Useful for when new tables not available
Default is False
proxy: str
Optional. Proxy server URL scheme
Default is None
"""
self._fundamentals.proxy = proxy
if legacy:
data = self._fundamentals.financials.get_income_scrape(freq=freq, proxy=proxy)
else:
data = self._fundamentals.financials.get_income_time_series(freq=freq, proxy=proxy)
data = self._fundamentals.financials.get_income_time_series(freq=freq, proxy=proxy)
if pretty:
data = data.copy()
@ -1625,13 +1610,13 @@ class TickerBase:
return data.to_dict()
return data
def get_incomestmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_income_stmt(proxy, as_dict, pretty, freq, legacy)
def get_incomestmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
return self.get_income_stmt(proxy, as_dict, pretty, freq)
def get_financials(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_income_stmt(proxy, as_dict, pretty, freq, legacy)
def get_financials(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
return self.get_income_stmt(proxy, as_dict, pretty, freq)
def get_balance_sheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
def get_balance_sheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
"""
:Parameters:
as_dict: bool
@ -1643,19 +1628,13 @@ class TickerBase:
freq: str
"yearly" or "quarterly"
Default is "yearly"
legacy: bool
Return old financials tables. Useful for when new tables not available
Default is False
proxy: str
Optional. Proxy server URL scheme
Default is None
"""
self._fundamentals.proxy = proxy
if legacy:
data = self._fundamentals.financials.get_balance_sheet_scrape(freq=freq, proxy=proxy)
else:
data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq, proxy=proxy)
data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq, proxy=proxy)
if pretty:
data = data.copy()
@ -1664,10 +1643,10 @@ class TickerBase:
return data.to_dict()
return data
def get_balancesheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_balance_sheet(proxy, as_dict, pretty, freq, legacy)
def get_balancesheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
return self.get_balance_sheet(proxy, as_dict, pretty, freq)
def get_cash_flow(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
def get_cash_flow(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
"""
:Parameters:
as_dict: bool
@ -1679,19 +1658,13 @@ class TickerBase:
freq: str
"yearly" or "quarterly"
Default is "yearly"
legacy: bool
Return old financials tables. Useful for when new tables not available
Default is False
proxy: str
Optional. Proxy server URL scheme
Default is None
"""
self._fundamentals.proxy = proxy
if legacy:
data = self._fundamentals.financials.get_cash_flow_scrape(freq=freq, proxy=proxy)
else:
data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq, proxy=proxy)
data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq, proxy=proxy)
if pretty:
data = data.copy()
@ -1700,8 +1673,8 @@ class TickerBase:
return data.to_dict()
return data
def get_cashflow(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_cash_flow(proxy, as_dict, pretty, freq, legacy)
def get_cashflow(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
return self.get_cash_flow(proxy, as_dict, pretty, freq)
def get_dividends(self, proxy=None):
if self._history is None:

View File

@ -0,0 +1,8 @@
fundamentals_keys = {}
fundamentals_keys['financials'] = ["TaxEffectOfUnusualItems","TaxRateForCalcs","NormalizedEBITDA","NormalizedDilutedEPS","NormalizedBasicEPS","TotalUnusualItems","TotalUnusualItemsExcludingGoodwill","NetIncomeFromContinuingOperationNetMinorityInterest","ReconciledDepreciation","ReconciledCostOfRevenue","EBITDA","EBIT","NetInterestIncome","InterestExpense","InterestIncome","ContinuingAndDiscontinuedDilutedEPS","ContinuingAndDiscontinuedBasicEPS","NormalizedIncome","NetIncomeFromContinuingAndDiscontinuedOperation","TotalExpenses","RentExpenseSupplemental","ReportedNormalizedDilutedEPS","ReportedNormalizedBasicEPS","TotalOperatingIncomeAsReported","DividendPerShare","DilutedAverageShares","BasicAverageShares","DilutedEPS","DilutedEPSOtherGainsLosses","TaxLossCarryforwardDilutedEPS","DilutedAccountingChange","DilutedExtraordinary","DilutedDiscontinuousOperations","DilutedContinuousOperations","BasicEPS","BasicEPSOtherGainsLosses","TaxLossCarryforwardBasicEPS","BasicAccountingChange","BasicExtraordinary","BasicDiscontinuousOperations","BasicContinuousOperations","DilutedNIAvailtoComStockholders","AverageDilutionEarnings","NetIncomeCommonStockholders","OtherunderPreferredStockDividend","PreferredStockDividends","NetIncome","MinorityInterests","NetIncomeIncludingNoncontrollingInterests","NetIncomeFromTaxLossCarryforward","NetIncomeExtraordinary","NetIncomeDiscontinuousOperations","NetIncomeContinuousOperations","EarningsFromEquityInterestNetOfTax","TaxProvision","PretaxIncome","OtherIncomeExpense","OtherNonOperatingIncomeExpenses","SpecialIncomeCharges","GainOnSaleOfPPE","GainOnSaleOfBusiness","OtherSpecialCharges","WriteOff","ImpairmentOfCapitalAssets","RestructuringAndMergernAcquisition","SecuritiesAmortization","EarningsFromEquityInterest","GainOnSaleOfSecurity","NetNonOperatingInterestIncomeExpense","TotalOtherFinanceCost","InterestExpenseNonOperating","InterestIncomeNonOperating","OperatingIncome","OperatingExpense","OtherOperatingExpenses","OtherTaxes","ProvisionForDoubtfulAccounts","DepreciationAmortizationDepletionIncomeStatement","DepletionIncomeStatement","DepreciationAndAmortizationInIncomeStatement","Amortization","AmortizationOfIntangiblesIncomeStatement","DepreciationIncomeStatement","ResearchAndDevelopment","SellingGeneralAndAdministration","SellingAndMarketingExpense","GeneralAndAdministrativeExpense","OtherGandA","InsuranceAndClaims","RentAndLandingFees","SalariesAndWages","GrossProfit","CostOfRevenue","TotalRevenue","ExciseTaxes","OperatingRevenue"]
fundamentals_keys['balance-sheet'] = ["TreasurySharesNumber","PreferredSharesNumber","OrdinarySharesNumber","ShareIssued","NetDebt","TotalDebt","TangibleBookValue","InvestedCapital","WorkingCapital","NetTangibleAssets","CapitalLeaseObligations","CommonStockEquity","PreferredStockEquity","TotalCapitalization","TotalEquityGrossMinorityInterest","MinorityInterest","StockholdersEquity","OtherEquityInterest","GainsLossesNotAffectingRetainedEarnings","OtherEquityAdjustments","FixedAssetsRevaluationReserve","ForeignCurrencyTranslationAdjustments","MinimumPensionLiabilities","UnrealizedGainLoss","TreasuryStock","RetainedEarnings","AdditionalPaidInCapital","CapitalStock","OtherCapitalStock","CommonStock","PreferredStock","TotalPartnershipCapital","GeneralPartnershipCapital","LimitedPartnershipCapital","TotalLiabilitiesNetMinorityInterest","TotalNonCurrentLiabilitiesNetMinorityInterest","OtherNonCurrentLiabilities","LiabilitiesHeldforSaleNonCurrent","RestrictedCommonStock","PreferredSecuritiesOutsideStockEquity","DerivativeProductLiabilities","EmployeeBenefits","NonCurrentPensionAndOtherPostretirementBenefitPlans","NonCurrentAccruedExpenses","DuetoRelatedPartiesNonCurrent","TradeandOtherPayablesNonCurrent","NonCurrentDeferredLiabilities","NonCurrentDeferredRevenue","NonCurrentDeferredTaxesLiabilities","LongTermDebtAndCapitalLeaseObligation","LongTermCapitalLeaseObligation","LongTermDebt","LongTermProvisions","CurrentLiabilities","OtherCurrentLiabilities","CurrentDeferredLiabilities","CurrentDeferredRevenue","CurrentDeferredTaxesLiabilities","CurrentDebtAndCapitalLeaseObligation","CurrentCapitalLeaseObligation","CurrentDebt","OtherCurrentBorrowings","LineOfCredit","CommercialPaper","CurrentNotesPayable","PensionandOtherPostRetirementBenefitPlansCurrent","CurrentProvisions","PayablesAndAccruedExpenses","CurrentAccruedExpenses","InterestPayable","Payables","OtherPayable","DuetoRelatedPartiesCurrent","DividendsPayable","TotalTaxPayable","IncomeTaxPayable","AccountsPayable","TotalAssets","TotalNonCurrentAssets","OtherNonCurrentAssets","DefinedPensionBenefit","NonCurrentPrepaidAssets","NonCurrentDeferredAssets","NonCurrentDeferredTaxesAssets","DuefromRelatedPartiesNonCurrent","NonCurrentNoteReceivables","NonCurrentAccountsReceivable","FinancialAssets","InvestmentsAndAdvances","OtherInvestments","InvestmentinFinancialAssets","HeldToMaturitySecurities","AvailableForSaleSecurities","FinancialAssetsDesignatedasFairValueThroughProfitorLossTotal","TradingSecurities","LongTermEquityInvestment","InvestmentsinJointVenturesatCost","InvestmentsInOtherVenturesUnderEquityMethod","InvestmentsinAssociatesatCost","InvestmentsinSubsidiariesatCost","InvestmentProperties","GoodwillAndOtherIntangibleAssets","OtherIntangibleAssets","Goodwill","NetPPE","AccumulatedDepreciation","GrossPPE","Leases","ConstructionInProgress","OtherProperties","MachineryFurnitureEquipment","BuildingsAndImprovements","LandAndImprovements","Properties","CurrentAssets","OtherCurrentAssets","HedgingAssetsCurrent","AssetsHeldForSaleCurrent","CurrentDeferredAssets","CurrentDeferredTaxesAssets","RestrictedCash","PrepaidAssets","Inventory","InventoriesAdjustmentsAllowances","OtherInventories","FinishedGoods","WorkInProcess","RawMaterials","Receivables","ReceivablesAdjustmentsAllowances","OtherReceivables","DuefromRelatedPartiesCurrent","TaxesReceivable","AccruedInterestReceivable","NotesReceivable","LoansReceivable","AccountsReceivable","AllowanceForDoubtfulAccountsReceivable","GrossAccountsReceivable","CashCashEquivalentsAndShortTermInvestments","OtherShortTermInvestments","CashAndCashEquivalents","CashEquivalents","CashFinancial"]
fundamentals_keys['cash-flow'] = ["ForeignSales","DomesticSales","AdjustedGeographySegmentData","FreeCashFlow","RepurchaseOfCapitalStock","RepaymentOfDebt","IssuanceOfDebt","IssuanceOfCapitalStock","CapitalExpenditure","InterestPaidSupplementalData","IncomeTaxPaidSupplementalData","EndCashPosition","OtherCashAdjustmentOutsideChangeinCash","BeginningCashPosition","EffectOfExchangeRateChanges","ChangesInCash","OtherCashAdjustmentInsideChangeinCash","CashFlowFromDiscontinuedOperation","FinancingCashFlow","CashFromDiscontinuedFinancingActivities","CashFlowFromContinuingFinancingActivities","NetOtherFinancingCharges","InterestPaidCFF","ProceedsFromStockOptionExercised","CashDividendsPaid","PreferredStockDividendPaid","CommonStockDividendPaid","NetPreferredStockIssuance","PreferredStockPayments","PreferredStockIssuance","NetCommonStockIssuance","CommonStockPayments","CommonStockIssuance","NetIssuancePaymentsOfDebt","NetShortTermDebtIssuance","ShortTermDebtPayments","ShortTermDebtIssuance","NetLongTermDebtIssuance","LongTermDebtPayments","LongTermDebtIssuance","InvestingCashFlow","CashFromDiscontinuedInvestingActivities","CashFlowFromContinuingInvestingActivities","NetOtherInvestingChanges","InterestReceivedCFI","DividendsReceivedCFI","NetInvestmentPurchaseAndSale","SaleOfInvestment","PurchaseOfInvestment","NetInvestmentPropertiesPurchaseAndSale","SaleOfInvestmentProperties","PurchaseOfInvestmentProperties","NetBusinessPurchaseAndSale","SaleOfBusiness","PurchaseOfBusiness","NetIntangiblesPurchaseAndSale","SaleOfIntangibles","PurchaseOfIntangibles","NetPPEPurchaseAndSale","SaleOfPPE","PurchaseOfPPE","CapitalExpenditureReported","OperatingCashFlow","CashFromDiscontinuedOperatingActivities","CashFlowFromContinuingOperatingActivities","TaxesRefundPaid","InterestReceivedCFO","InterestPaidCFO","DividendReceivedCFO","DividendPaidCFO","ChangeInWorkingCapital","ChangeInOtherWorkingCapital","ChangeInOtherCurrentLiabilities","ChangeInOtherCurrentAssets","ChangeInPayablesAndAccruedExpense","ChangeInAccruedExpense","ChangeInInterestPayable","ChangeInPayable","ChangeInDividendPayable","ChangeInAccountPayable","ChangeInTaxPayable","ChangeInIncomeTaxPayable","ChangeInPrepaidAssets","ChangeInInventory","ChangeInReceivables","ChangesInAccountReceivables","OtherNonCashItems","ExcessTaxBenefitFromStockBasedCompensation","StockBasedCompensation","UnrealizedGainLossOnInvestmentSecurities","ProvisionandWriteOffofAssets","AssetImpairmentCharge","AmortizationOfSecurities","DeferredTax","DeferredIncomeTax","DepreciationAmortizationDepletion","Depletion","DepreciationAndAmortization","AmortizationCashFlow","AmortizationOfIntangibles","Depreciation","OperatingGainsLosses","PensionAndEmployeeBenefitExpense","EarningsLossesFromEquityInvestments","GainLossOnInvestmentSecurities","NetForeignCurrencyExchangeGainLoss","GainLossOnSaleOfPPE","GainLossOnSaleOfBusiness","NetIncomeFromContinuingOperations","CashFlowsfromusedinOperatingActivitiesDirect","TaxesRefundPaidDirect","InterestReceivedDirect","InterestPaidDirect","DividendsReceivedDirect","DividendsPaidDirect","ClassesofCashPayments","OtherCashPaymentsfromOperatingActivities","PaymentsonBehalfofEmployees","PaymentstoSuppliersforGoodsandServices","ClassesofCashReceiptsfromOperatingActivities","OtherCashReceiptsfromOperatingActivities","ReceiptsfromGovernmentGrants","ReceiptsfromCustomers"]

View File

@ -2,30 +2,14 @@ import functools
from functools import lru_cache
import logging
import hashlib
from base64 import b64decode
usePycryptodome = False # slightly faster
# usePycryptodome = True
if usePycryptodome:
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
else:
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
import requests as requests
import re
from bs4 import BeautifulSoup
import random
import time
from frozendict import frozendict
try:
import ujson as json
except ImportError:
import json as json
from . import utils
cache_maxsize = 64
@ -52,127 +36,6 @@ def lru_cache_freezeargs(func):
return wrapped
def _extract_extra_keys_from_stores(data):
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
new_keys_values = set([data[k] for k in new_keys])
# Maybe multiple keys have same value - keep one of each
new_keys_uniq = []
new_keys_uniq_values = set()
for k in new_keys:
v = data[k]
if not v in new_keys_uniq_values:
new_keys_uniq.append(k)
new_keys_uniq_values.add(v)
return [data[k] for k in new_keys_uniq]
def decrypt_cryptojs_aes_stores(data, keys=None):
encrypted_stores = data['context']['dispatcher']['stores']
password = None
if keys is not None:
if not isinstance(keys, list):
raise TypeError("'keys' must be list")
candidate_passwords = keys
else:
candidate_passwords = []
if "_cs" in data and "_cr" in data:
_cs = data["_cs"]
_cr = data["_cr"]
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
salt = encrypted_stores[8:16]
encrypted_stores = encrypted_stores[16:]
def _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
"""OpenSSL EVP Key Derivation Function
Args:
password (Union[str, bytes, bytearray]): Password to generate key from.
salt (Union[bytes, bytearray]): Salt to use.
keySize (int, optional): Output key length in bytes. Defaults to 32.
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
iterations (int, optional): Number of iterations to perform. Defaults to 1.
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
Returns:
key, iv: Derived key and Initialization Vector (IV) bytes.
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
"""
assert iterations > 0, "Iterations can not be less than 1."
if isinstance(password, str):
password = password.encode("utf-8")
final_length = keySize + ivSize
key_iv = b""
block = None
while len(key_iv) < final_length:
hasher = hashlib.new(hashAlgorithm)
if block:
hasher.update(block)
hasher.update(password)
hasher.update(salt)
block = hasher.digest()
for _ in range(1, iterations):
block = hashlib.new(hashAlgorithm, block).digest()
key_iv += block
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv
def _decrypt(encrypted_stores, password, key, iv):
if usePycryptodome:
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
plaintext = cipher.decrypt(encrypted_stores)
plaintext = unpad(plaintext, 16, style="pkcs7")
else:
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
decryptor = cipher.decryptor()
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
unpadder = padding.PKCS7(128).unpadder()
plaintext = unpadder.update(plaintext) + unpadder.finalize()
plaintext = plaintext.decode("utf-8")
return plaintext
if not password is None:
try:
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
except:
raise Exception("yfinance failed to decrypt Yahoo data response")
plaintext = _decrypt(encrypted_stores, password, key, iv)
else:
success = False
for i in range(len(candidate_passwords)):
# print(f"Trying candiate pw {i+1}/{len(candidate_passwords)}")
password = candidate_passwords[i]
try:
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
plaintext = _decrypt(encrypted_stores, password, key, iv)
success = True
break
except:
pass
if not success:
raise Exception("yfinance failed to decrypt Yahoo data response")
decoded_stores = json.loads(plaintext)
return decoded_stores
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
class TickerData:
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
@ -211,125 +74,3 @@ class TickerData:
response = self.get(url, user_agent_headers=user_agent_headers, params=params, proxy=proxy, timeout=timeout)
response.raise_for_status()
return response.json()
def _get_decryption_keys_from_yahoo_js(self, soup):
result = None
key_count = 4
re_script = soup.find("script", string=re.compile("root.App.main")).text
re_data = json.loads(re.search("root.App.main\s+=\s+(\{.*\})", re_script).group(1))
re_data.pop("context", None)
key_list = list(re_data.keys())
if re_data.get("plugins"): # 1) attempt to get last 4 keys after plugins
ind = key_list.index("plugins")
if len(key_list) > ind+1:
sub_keys = key_list[ind+1:]
if len(sub_keys) == key_count:
re_obj = {}
missing_val = False
for k in sub_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
result = re_obj
if not result is None:
return [''.join(result.values())]
re_keys = [] # 2) attempt scan main.js file approach to get keys
prefix = "https://s.yimg.com/uc/finance/dd-site/js/main."
tags = [tag['src'] for tag in soup.find_all('script') if prefix in tag.get('src', '')]
for t in tags:
response_js = self.cache_get(t)
#
if response_js.status_code != 200:
time.sleep(random.randrange(10, 20))
response_js.close()
else:
r_data = response_js.content.decode("utf8")
re_list = [
x.group() for x in re.finditer(r"context.dispatcher.stores=JSON.parse((?:.*?\r?\n?)*)toString", r_data)
]
for rl in re_list:
re_sublist = [x.group() for x in re.finditer(r"t\[\"((?:.*?\r?\n?)*)\"\]", rl)]
if len(re_sublist) == key_count:
re_keys = [sl.replace('t["', '').replace('"]', '') for sl in re_sublist]
break
response_js.close()
if len(re_keys) == key_count:
break
if len(re_keys) > 0:
re_obj = {}
missing_val = False
for k in re_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
return [''.join(re_obj.values())]
return []
@utils.log_indent_decorator
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
'''
get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
'''
if sub_page:
ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
else:
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
response = self.get(url=ticker_url, proxy=proxy)
html = response.text
# The actual json-data for stores is in a javascript assignment in the webpage
try:
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
# Fetch failed, probably because Yahoo spam triggered
return {}
data = json.loads(json_str)
# Gather decryption keys:
soup = BeautifulSoup(response.content, "html.parser")
keys = self._get_decryption_keys_from_yahoo_js(soup)
if len(keys) == 0:
msg = "No decryption keys could be extracted from JS file."
if "requests_cache" in str(type(response)):
msg += " Try flushing your 'requests_cache', probably parsing old JS."
utils.get_yf_logger().warning("%s Falling back to backup decrypt methods.", msg)
if len(keys) == 0:
keys = []
try:
extra_keys = _extract_extra_keys_from_stores(data)
keys = [''.join(extra_keys[-4:])]
except:
pass
#
keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
response_gh = self.cache_get(keys_url)
keys += response_gh.text.splitlines()
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
# return data
new_data = json.dumps(stores).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
return json.loads(new_data)

View File

@ -4,3 +4,9 @@ class YFinanceException(Exception):
class YFinanceDataException(YFinanceException):
pass
class YFNotImplementedError(NotImplementedError):
def __init__(self, method_name):
super().__init__(f"Have not implemented fetching '{method_name}' from Yahoo API")

View File

@ -2,6 +2,7 @@ import pandas as pd
from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFNotImplementedError
class Analysis:
@ -20,100 +21,29 @@ class Analysis:
@property
def earnings_trend(self) -> pd.DataFrame:
if self._earnings_trend is None:
self._scrape(self.proxy)
raise YFNotImplementedError('earnings_trend')
return self._earnings_trend
@property
def analyst_trend_details(self) -> pd.DataFrame:
if self._analyst_trend_details is None:
self._scrape(self.proxy)
raise YFNotImplementedError('analyst_trend_details')
return self._analyst_trend_details
@property
def analyst_price_target(self) -> pd.DataFrame:
if self._analyst_price_target is None:
self._scrape(self.proxy)
raise YFNotImplementedError('analyst_price_target')
return self._analyst_price_target
@property
def rev_est(self) -> pd.DataFrame:
if self._rev_est is None:
self._scrape(self.proxy)
raise YFNotImplementedError('rev_est')
return self._rev_est
@property
def eps_est(self) -> pd.DataFrame:
if self._eps_est is None:
self._scrape(self.proxy)
raise YFNotImplementedError('eps_est')
return self._eps_est
@utils.log_indent_decorator
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
# Analysis Data/Analyst Forecasts
analysis_data = self._data.get_json_data_stores("analysis", proxy=proxy)
try:
analysis_data = analysis_data['QuoteSummaryStore']
except KeyError as e:
err_msg = "No analysis data found, symbol may be delisted"
utils.get_yf_logger().error('%s: %s', self._data.ticker, err_msg)
return
if isinstance(analysis_data.get('earningsTrend'), dict):
try:
analysis = pd.DataFrame(analysis_data['earningsTrend']['trend'])
analysis['endDate'] = pd.to_datetime(analysis['endDate'])
analysis.set_index('period', inplace=True)
analysis.index = analysis.index.str.upper()
analysis.index.name = 'Period'
analysis.columns = utils.camel2title(analysis.columns)
dict_cols = []
for idx, row in analysis.iterrows():
for colname, colval in row.items():
if isinstance(colval, dict):
dict_cols.append(colname)
for k, v in colval.items():
new_colname = colname + ' ' + \
utils.camel2title([k])[0]
analysis.loc[idx, new_colname] = v
self._earnings_trend = analysis[[
c for c in analysis.columns if c not in dict_cols]]
except Exception:
pass
try:
self._analyst_trend_details = pd.DataFrame(analysis_data['recommendationTrend']['trend'])
except Exception as e:
self._analyst_trend_details = None
try:
self._analyst_price_target = pd.DataFrame(analysis_data['financialData'], index=[0])[
['targetLowPrice', 'currentPrice', 'targetMeanPrice', 'targetHighPrice', 'numberOfAnalystOpinions']].T
except Exception as e:
self._analyst_price_target = None
earnings_estimate = []
revenue_estimate = []
if self._analyst_trend_details is not None :
for key in analysis_data['earningsTrend']['trend']:
try:
earnings_dict = key['earningsEstimate']
earnings_dict['period'] = key['period']
earnings_dict['endDate'] = key['endDate']
earnings_estimate.append(earnings_dict)
revenue_dict = key['revenueEstimate']
revenue_dict['period'] = key['period']
revenue_dict['endDate'] = key['endDate']
revenue_estimate.append(revenue_dict)
except Exception as e:
pass
self._rev_est = pd.DataFrame(revenue_estimate)
self._eps_est = pd.DataFrame(earnings_estimate)
else:
self._rev_est = pd.DataFrame()
self._eps_est = pd.DataFrame()

View File

@ -5,9 +5,9 @@ import json
import pandas as pd
import numpy as np
from yfinance import utils
from yfinance import utils, const
from yfinance.data import TickerData
from yfinance.exceptions import YFinanceDataException, YFinanceException
from yfinance.exceptions import YFinanceException, YFNotImplementedError
class Fundamentals:
@ -31,72 +31,15 @@ class Fundamentals:
@property
def earnings(self) -> dict:
if self._earnings is None:
self._scrape_earnings(self.proxy)
raise YFNotImplementedError('earnings')
return self._earnings
@property
def shares(self) -> pd.DataFrame:
if self._shares is None:
self._scrape_shares(self.proxy)
raise YFNotImplementedError('shares')
return self._shares
@utils.log_indent_decorator
def _scrape_basics(self, proxy):
if self._basics_already_scraped:
return
self._basics_already_scraped = True
self._financials_data = self._data.get_json_data_stores('financials', proxy)
try:
self._fin_data_quote = self._financials_data['QuoteSummaryStore']
except KeyError:
err_msg = "No financials data found, symbol may be delisted"
utils.get_yf_logger().error('%s: %s', self._data.ticker, err_msg)
return None
def _scrape_earnings(self, proxy):
self._scrape_basics(proxy)
# earnings
self._earnings = {"yearly": pd.DataFrame(), "quarterly": pd.DataFrame()}
if self._fin_data_quote is None:
return
if isinstance(self._fin_data_quote.get('earnings'), dict):
try:
earnings = self._fin_data_quote['earnings']['financialsChart']
earnings['financialCurrency'] = self._fin_data_quote['earnings'].get('financialCurrency', 'USD')
self._earnings['financialCurrency'] = earnings['financialCurrency']
df = pd.DataFrame(earnings['yearly']).set_index('date')
df.columns = utils.camel2title(df.columns)
df.index.name = 'Year'
self._earnings['yearly'] = df
df = pd.DataFrame(earnings['quarterly']).set_index('date')
df.columns = utils.camel2title(df.columns)
df.index.name = 'Quarter'
self._earnings['quarterly'] = df
except Exception:
pass
def _scrape_shares(self, proxy):
self._scrape_basics(proxy)
# shares outstanding
try:
# keep only years with non None data
available_shares = [shares_data for shares_data in
self._financials_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
if
shares_data]
shares = pd.DataFrame(available_shares)
shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4]))
shares.set_index('Year', inplace=True)
shares.drop(columns=['dataId', 'asOfDate',
'periodType', 'currencyCode'], inplace=True)
shares.rename(
columns={'reportedValue': "BasicShares"}, inplace=True)
self._shares = shares
except Exception:
pass
class Financials:
def __init__(self, data: TickerData):
@ -104,9 +47,6 @@ class Financials:
self._income_time_series = {}
self._balance_sheet_time_series = {}
self._cash_flow_time_series = {}
self._income_scraped = {}
self._balance_sheet_scraped = {}
self._cash_flow_scraped = {}
def get_income_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_time_series
@ -154,37 +94,13 @@ class Financials:
# Yahoo stores the 'income' table internally under 'financials' key
name = "financials"
keys = self._get_datastore_keys(name, proxy)
keys = const.fundamentals_keys[name]
try:
return self.get_financials_time_series(timescale, keys, proxy)
except Exception as e:
pass
def _get_datastore_keys(self, sub_page, proxy) -> list:
data_stores = self._data.get_json_data_stores(sub_page, proxy)
# Step 1: get the keys:
def _finditem1(key, obj):
values = []
if isinstance(obj, dict):
if key in obj.keys():
values.append(obj[key])
for k, v in obj.items():
values += _finditem1(key, v)
elif isinstance(obj, list):
for v in obj:
values += _finditem1(key, v)
return values
try:
keys = _finditem1("key", data_stores['FinancialTemplateStore'])
except KeyError as e:
raise YFinanceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
if not keys:
raise YFinanceDataException("No keys in FinancialTemplateStore")
return keys
def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame:
timescale_translation = {"yearly": "annual", "quarterly": "quarterly"}
timescale = timescale_translation[timescale]
@ -233,90 +149,3 @@ class Financials:
df = df[sorted(df.columns, reverse=True)]
return df
def get_income_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_scraped
if freq not in res:
res[freq] = self._scrape("income", freq, proxy=None)
return res[freq]
def get_balance_sheet_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet_scraped
if freq not in res:
res[freq] = self._scrape("balance-sheet", freq, proxy=None)
return res[freq]
def get_cash_flow_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow_scraped
if freq not in res:
res[freq] = self._scrape("cash-flow", freq, proxy=None)
return res[freq]
@utils.log_indent_decorator
def _scrape(self, name, timescale, proxy=None):
# Backup in case _fetch_time_series() fails to return data
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly"]
if name not in allowed_names:
raise ValueError("Illegal argument: name must be one of: {}".format(allowed_names))
if timescale not in allowed_timescales:
raise ValueError("Illegal argument: timescale must be one of: {}".format(allowed_names))
try:
statement = self._create_financials_table_old(name, timescale, proxy)
if statement is not None:
return statement
except YFinanceException as e:
utils.get_yf_logger().error("%s: Failed to create financials table for %s reason: %r", self._data.ticker, name, e)
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):
data_stores = self._data.get_json_data_stores("financials", proxy)
# Fetch raw data
if not "QuoteSummaryStore" in data_stores:
raise YFinanceDataException(f"Yahoo not returning legacy financials data")
data = data_stores["QuoteSummaryStore"]
if name == "cash-flow":
key1 = "cashflowStatement"
key2 = "cashflowStatements"
elif name == "balance-sheet":
key1 = "balanceSheet"
key2 = "balanceSheetStatements"
else:
key1 = "incomeStatement"
key2 = "incomeStatementHistory"
key1 += "History"
if timescale == "quarterly":
key1 += "Quarterly"
if key1 not in data or data[key1] is None or key2 not in data[key1]:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
data = data[key1][key2]
# Tabulate
df = pd.DataFrame(data)
if len(df) == 0:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
df = df.drop(columns=['maxAge'])
for col in df.columns:
df[col] = df[col].replace('-', np.nan)
df.set_index('endDate', inplace=True)
try:
df.index = pd.to_datetime(df.index, unit='s')
except ValueError:
df.index = pd.to_datetime(df.index)
df = df.T
df.columns.name = ''
df.index.name = 'Breakdown'
# rename incorrect yahoo key
df.rename(index={'treasuryStock': 'gainsLossesNotAffectingRetainedEarnings'}, inplace=True)
# Upper-case first letter, leave rest unchanged:
s0 = df.index[0]
df.index = [s[0].upper()+s[1:] for s in df.index]
return df

View File

@ -8,6 +8,7 @@ import numpy as _np
from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFNotImplementedError
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
@ -19,8 +20,6 @@ info_retired_keys_symbol = {"symbol"}
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
PRUNE_INFO = True
# PRUNE_INFO = False
_BASIC_URL_ = "https://query2.finance.yahoo.com/v10/finance/quoteSummary"
@ -292,9 +291,9 @@ class FastInfo:
return self._shares
shares = self._tkr.get_shares_full(start=pd.Timestamp.utcnow().date()-pd.Timedelta(days=548))
if shares is None:
# Requesting 18 months failed, so fallback to shares which should include last year
shares = self._tkr.get_shares()
# if shares is None:
# # Requesting 18 months failed, so fallback to shares which should include last year
# shares = self._tkr.get_shares()
if shares is not None:
if isinstance(shares, pd.DataFrame):
shares = shares[shares.columns[0]]
@ -561,9 +560,7 @@ class Quote:
@property
def info(self) -> dict:
if self._info is None:
# self._scrape(self.proxy) # decrypt broken
self._fetch(self.proxy)
self._fetch_complementary(self.proxy)
return self._info
@ -571,143 +568,21 @@ class Quote:
@property
def sustainability(self) -> pd.DataFrame:
if self._sustainability is None:
self._scrape(self.proxy)
raise YFNotImplementedError('sustainability')
return self._sustainability
@property
def recommendations(self) -> pd.DataFrame:
if self._recommendations is None:
self._scrape(self.proxy)
raise YFNotImplementedError('recommendations')
return self._recommendations
@property
def calendar(self) -> pd.DataFrame:
if self._calendar is None:
self._scrape(self.proxy)
raise YFNotImplementedError('calendar')
return self._calendar
@utils.log_indent_decorator
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
# get info and sustainability
json_data = self._data.get_json_data_stores(proxy=proxy)
try:
quote_summary_store = json_data['QuoteSummaryStore']
except KeyError:
err_msg = "No summary info found, symbol may be delisted"
utils.get_yf_logger().error('%s: %s', self._data.ticker, err_msg)
return None
# sustainability
d = {}
try:
if isinstance(quote_summary_store.get('esgScores'), dict):
for item in quote_summary_store['esgScores']:
if not isinstance(quote_summary_store['esgScores'][item], (dict, list)):
d[item] = quote_summary_store['esgScores'][item]
s = pd.DataFrame(index=[0], data=d)[-1:].T
s.columns = ['Value']
s.index.name = '%.f-%.f' % (
s[s.index == 'ratingYear']['Value'].values[0],
s[s.index == 'ratingMonth']['Value'].values[0])
self._sustainability = s[~s.index.isin(
['maxAge', 'ratingYear', 'ratingMonth'])]
except Exception:
pass
self._info = {}
try:
items = ['summaryProfile', 'financialData', 'quoteType',
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
for item in items:
if isinstance(quote_summary_store.get(item), dict):
self._info.update(quote_summary_store[item])
except Exception:
pass
# For ETFs, provide this valuable data: the top holdings of the ETF
try:
if 'topHoldings' in quote_summary_store:
self._info.update(quote_summary_store['topHoldings'])
except Exception:
pass
try:
if not isinstance(quote_summary_store.get('summaryDetail'), dict):
# For some reason summaryDetail did not give any results. The price dict
# usually has most of the same info
self._info.update(quote_summary_store.get('price', {}))
except Exception:
pass
try:
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
self._info['regularMarketPrice'] = quote_summary_store.get('price', {}).get(
'regularMarketPrice', self._info.get('regularMarketOpen', None))
except Exception:
pass
try:
self._info['preMarketPrice'] = quote_summary_store.get('price', {}).get(
'preMarketPrice', self._info.get('preMarketPrice', None))
except Exception:
pass
self._info['logo_url'] = ""
try:
if not 'website' in self._info:
self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % \
self._info['shortName'].split(' ')[0].split(',')[0]
else:
domain = self._info['website'].split(
'://')[1].split('/')[0].replace('www.', '')
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
except Exception:
pass
# Delete redundant info[] keys, because values can be accessed faster
# elsewhere - e.g. price keys. Hope is reduces Yahoo spam effect.
# But record the dropped keys, because in rare cases they are needed.
self._retired_info = {}
for k in info_retired_keys:
if k in self._info:
self._retired_info[k] = self._info[k]
if PRUNE_INFO:
del self._info[k]
if PRUNE_INFO:
# InfoDictWrapper will explain how to access above data elsewhere
self._info = InfoDictWrapper(self._info)
# events
try:
cal = pd.DataFrame(quote_summary_store['calendarEvents']['earnings'])
cal['earningsDate'] = pd.to_datetime(
cal['earningsDate'], unit='s')
self._calendar = cal.T
self._calendar.index = utils.camel2title(self._calendar.index)
self._calendar.columns = ['Value']
except Exception as e:
pass
# analyst recommendations
try:
rec = pd.DataFrame(
quote_summary_store['upgradeDowngradeHistory']['history'])
rec['earningsDate'] = pd.to_datetime(
rec['epochGradeDate'], unit='s')
rec.set_index('earningsDate', inplace=True)
rec.index.name = 'Date'
rec.columns = utils.camel2title(rec.columns)
self._recommendations = rec[[
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
except Exception:
pass
def _fetch(self, proxy):
if self._already_fetched:
return

View File

@ -235,6 +235,10 @@ class Ticker(TickerBase):
def news(self):
return self.get_news()
@property
def trend_details(self) -> _pd.DataFrame:
return self.get_trend_details()
@property
def earnings_trend(self) -> _pd.DataFrame:
return self.get_earnings_trend()