Logging: improve appearance, fix propagation

Various important changes to yfinance logging:

Remove handler from YF logger as breaks standard logging practice.

Improve 'download' log message grouping

Move custom DEBUG log formatting into 'yf.enable_debug_mode()', which if called adds the handler. The custom DEBUG log formatting is:
- 'MultiLineFormatter' improves appearance of multi-line messages in any logging mode. Adds leading indent to all lines below first, to align with the first line which is indented by %levelname%.
- Whitespace padding is added to end of %levelname% string up to 8 characters. This resolves different level strings e.g. INFO & DEBUG creating different indents. But this only automatically applies to first line of message - for multi-line messages, 'MultiLineFormatter' extracts amount of padding and applies to all other lines.
- Add leading indent proportional to function depth, because DEBUG generates a lot of messages particularly when repairing price data - same function can be called recursively. Implemented in 'IndentLoggerAdapter', a simple wrapper around the logger object.
- 'log_indent_decorator' inserts 'Entering/Exiting %function%' debug messages, helps organise.
pull/1562/head
ValueRaider 2023-06-15 14:16:46 +01:00
parent 1a054135fb
commit 4e7b2094d0
8 changed files with 152 additions and 35 deletions

View File

@ -23,7 +23,7 @@ from . import version
from .ticker import Ticker
from .tickers import Tickers
from .multi import download
from .utils import set_tz_cache_location
from .utils import set_tz_cache_location, enable_debug_mode
__version__ = version.version
__author__ = "Ran Aroussi"
@ -43,4 +43,4 @@ def pdr_override():
pass
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'enable_debug_mode', 'set_tz_cache_location']

View File

@ -45,7 +45,6 @@ from .scrapers.quote import Quote, FastInfo
import json as _json
import logging
logger = utils.get_yf_logger()
_BASE_URL_ = 'https://query2.finance.yahoo.com'
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
@ -91,6 +90,7 @@ class TickerBase:
data = self._data.get_json_data_stores(proxy=proxy)["QuoteSummaryStore"]
return data
@utils.log_indent_decorator
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
@ -142,6 +142,7 @@ class TickerBase:
raise_errors: bool
If True, then raise errors as Exceptions instead of logging.
"""
logger = utils.get_yf_logger()
if debug is not None:
if debug:
@ -456,15 +457,17 @@ class TickerBase:
# ------------------------
@utils.log_indent_decorator
def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1, silent=False):
# Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
logger = utils.get_yf_logger()
if not isinstance(df, _pd.DataFrame):
raise Exception("'df' must be a Pandas DataFrame not", type(df))
if interval == "1m":
# Can't go smaller than 1m so can't reconstruct
return df
# Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
if interval[1:] in ['d', 'wk', 'mo']:
# Interday data always includes pre & post
prepost = True
@ -784,10 +787,12 @@ class TickerBase:
return df_v2
@utils.log_indent_decorator
def _fix_unit_mixups(self, df, interval, tz_exchange, prepost, silent=False):
# Sometimes Yahoo returns few prices in cents/pence instead of $/£
# I.e. 100x bigger
# Easy to detect and fix, just look for outliers = ~100x local median
logger = utils.get_yf_logger()
if df.shape[0] == 0:
if not "Repaired?" in df.columns:
@ -911,11 +916,14 @@ class TickerBase:
return df2
@utils.log_indent_decorator
def _fix_zeroes(self, df, interval, tz_exchange, prepost, silent=False):
# Sometimes Yahoo returns prices=0 or NaN when trades occurred.
# But most times when prices=0 or NaN returned is because no trades.
# Impossible to distinguish, so only attempt repair if few or rare.
logger = utils.get_yf_logger()
if df.shape[0] == 0:
if not "Repaired?" in df.columns:
df["Repaired?"] = False
@ -1030,9 +1038,12 @@ class TickerBase:
self._tz = tz
return tz
@utils.log_indent_decorator
def _fetch_ticker_tz(self, proxy, timeout):
# Query Yahoo for fast price data just to get returned timezone
logger = utils.get_yf_logger()
params = {"range": "1d", "interval": "1d"}
# Getting data from json
@ -1337,7 +1348,10 @@ class TickerBase:
return data.to_dict()
return data
@utils.log_indent_decorator
def get_shares_full(self, start=None, end=None, proxy=None):
logger = utils.get_yf_logger()
# Process dates
tz = self._get_ticker_tz(proxy=None, timeout=10)
dt_now = _pd.Timestamp.utcnow().tz_convert(tz)
@ -1445,6 +1459,7 @@ class TickerBase:
self._news = data.get("news", [])
return self._news
@utils.log_indent_decorator
def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]:
"""
Get earning dates (future and historic)
@ -1458,6 +1473,8 @@ class TickerBase:
if self._earnings_dates and limit in self._earnings_dates:
return self._earnings_dates[limit]
logger = utils.get_yf_logger()
page_size = min(limit, 100) # YF caps at 100, don't go higher
page_offset = 0
dates = None

View File

@ -30,8 +30,6 @@ from . import utils
cache_maxsize = 64
logger = utils.get_yf_logger()
def lru_cache_freezeargs(func):
"""
@ -275,6 +273,7 @@ class TickerData:
return []
@utils.log_indent_decorator
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
@ -306,7 +305,7 @@ class TickerData:
msg = "No decryption keys could be extracted from JS file."
if "requests_cache" in str(type(response)):
msg += " Try flushing your 'requests_cache', probably parsing old JS."
logger.warning("%s Falling back to backup decrypt methods.", msg)
utils.get_yf_logger().warning("%s Falling back to backup decrypt methods.", msg)
if len(keys) == 0:
keys = []
try:

View File

@ -30,6 +30,7 @@ import pandas as _pd
from . import Ticker, utils
from . import shared
@utils.log_indent_decorator
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=None, interval="1d", prepost=False,
@ -85,14 +86,26 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
session: None or Session
Optional. Pass your own session object to be used for all requests
"""
logger = utils.get_yf_logger()
if show_errors is not None:
if show_errors:
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead: logging.getLogger('yfinance').setLevel(logging.ERROR)")
logging.getLogger('yfinance').setLevel(logging.ERROR)
logger.setLevel(logging.ERROR)
else:
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead to suppress error messages: logging.getLogger('yfinance').setLevel(logging.CRITICAL)")
logging.getLogger('yfinance').setLevel(logging.CRITICAL)
logger.setLevel(logging.CRITICAL)
if logger.isEnabledFor(logging.DEBUG):
if threads:
# With DEBUG, each thread generates a lot of log messages.
# And with multi-threading, these messages will be interleaved, bad!
# So disable multi-threading to make log readable.
logger.debug('Disabling multithreading because DEBUG logging enabled')
threads = False
if progress:
# Disable progress bar, interferes with display of log messages
progress = False
if ignore_tz is None:
# Set default value depending on interval
@ -167,6 +180,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
errors = {}
for ticker in shared._ERRORS:
err = shared._ERRORS[ticker]
err = err.replace(f'{ticker}', '%ticker%')
if not err in errors:
errors[err] = [ticker]
else:
@ -178,6 +192,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
tbs = {}
for ticker in shared._TRACEBACKS:
tb = shared._TRACEBACKS[ticker]
tb = tb.replace(f'{ticker}', '%ticker%')
if not tb in tbs:
tbs[tb] = [ticker]
else:

View File

@ -47,6 +47,7 @@ class Analysis:
self._scrape(self.proxy)
return self._eps_est
@utils.log_indent_decorator
def _scrape(self, proxy):
if self._already_scraped:
return
@ -58,7 +59,7 @@ class Analysis:
analysis_data = analysis_data['QuoteSummaryStore']
except KeyError as e:
err_msg = "No analysis data found, symbol may be delisted"
logger.error('%s: %s', self._data.ticker, err_msg)
utils.get_yf_logger().error('%s: %s', self._data.ticker, err_msg)
return
if isinstance(analysis_data.get('earningsTrend'), dict):

View File

@ -9,8 +9,6 @@ from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFinanceDataException, YFinanceException
logger = utils.get_yf_logger()
class Fundamentals:
def __init__(self, data: TickerData, proxy=None):
@ -42,6 +40,7 @@ class Fundamentals:
self._scrape_shares(self.proxy)
return self._shares
@utils.log_indent_decorator
def _scrape_basics(self, proxy):
if self._basics_already_scraped:
return
@ -52,7 +51,7 @@ class Fundamentals:
self._fin_data_quote = self._financials_data['QuoteSummaryStore']
except KeyError:
err_msg = "No financials data found, symbol may be delisted"
logger.error('%s: %s', self._data.ticker, err_msg)
utils.get_yf_logger().error('%s: %s', self._data.ticker, err_msg)
return None
def _scrape_earnings(self, proxy):
@ -127,6 +126,7 @@ class Financials:
res[freq] = self._fetch_time_series("cash-flow", freq, proxy=None)
return res[freq]
@utils.log_indent_decorator
def _fetch_time_series(self, name, timescale, proxy=None):
# Fetching time series preferred over scraping 'QuoteSummaryStore',
# because it matches what Yahoo shows. But for some tickers returns nothing,
@ -146,7 +146,7 @@ class Financials:
if statement is not None:
return statement
except YFinanceException as e:
logger.error("%s: Failed to create %s financials table for reason: %r", self._data.ticker, name, e)
utils.get_yf_logger().error("%s: Failed to create %s financials table for reason: %r", self._data.ticker, name, e)
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
@ -252,6 +252,7 @@ class Financials:
res[freq] = self._scrape("cash-flow", freq, proxy=None)
return res[freq]
@utils.log_indent_decorator
def _scrape(self, name, timescale, proxy=None):
# Backup in case _fetch_time_series() fails to return data
@ -269,7 +270,7 @@ class Financials:
if statement is not None:
return statement
except YFinanceException as e:
logger.error("%s: Failed to create financials table for %s reason: %r", self._data.ticker, name, e)
utils.get_yf_logger().error("%s: Failed to create financials table for %s reason: %r", self._data.ticker, name, e)
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):

View File

@ -9,8 +9,6 @@ import numpy as _np
from yfinance import utils
from yfinance.data import TickerData
logger = utils.get_yf_logger()
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "52WeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
@ -179,10 +177,9 @@ class FastInfo:
def _get_1y_prices(self, fullDaysOnly=False):
if self._prices_1y is None:
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
logging.disable(logging.CRITICAL)
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True)
logger.setLevel(l)
logging.disable(logging.NOTSET)
self._md = self._tkr.get_history_metadata()
try:
ctp = self._md["currentTradingPeriod"]
@ -209,19 +206,17 @@ class FastInfo:
def _get_1wk_1h_prepost_prices(self):
if self._prices_1wk_1h_prepost is None:
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
logging.disable(logging.CRITICAL)
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True)
logger.setLevel(l)
logging.disable(logging.NOTSET)
return self._prices_1wk_1h_prepost
def _get_1wk_1h_reg_prices(self):
if self._prices_1wk_1h_reg is None:
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
logging.disable(logging.CRITICAL)
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False)
logger.setLevel(l)
logging.disable(logging.NOTSET)
return self._prices_1wk_1h_reg
def _get_exchange_metadata(self):
@ -593,6 +588,7 @@ class Quote:
self._scrape(self.proxy)
return self._calendar
@utils.log_indent_decorator
def _scrape(self, proxy):
if self._already_scraped:
return
@ -604,7 +600,7 @@ class Quote:
quote_summary_store = json_data['QuoteSummaryStore']
except KeyError:
err_msg = "No summary info found, symbol may be delisted"
logger.error('%s: %s', self._data.ticker, err_msg)
utils.get_yf_logger().error('%s: %s', self._data.ticker, err_msg)
return None
# sustainability

View File

@ -70,19 +70,107 @@ def print_once(msg):
print(msg)
## Logging
# Note: most of this logic is adding indentation with function depth,
# so that DEBUG log is readable.
class IndentLoggerAdapter(logging.LoggerAdapter):
def process(self, msg, kwargs):
if get_yf_logger().isEnabledFor(logging.DEBUG):
i = ' ' * self.extra['indent']
if not isinstance(msg, str):
msg = str(msg)
msg = '\n'.join([i + m for m in msg.split('\n')])
return msg, kwargs
import threading
_indentation_level = threading.local()
class IndentationContext:
def __init__(self, increment=1):
self.increment = increment
def __enter__(self):
_indentation_level.indent = getattr(_indentation_level, 'indent', 0) + self.increment
def __exit__(self, exc_type, exc_val, exc_tb):
_indentation_level.indent -= self.increment
def get_indented_logger(name=None):
# Never cache the returned value! Will break indentation.
return IndentLoggerAdapter(logging.getLogger(name), {'indent': getattr(_indentation_level, 'indent', 0)})
def log_indent_decorator(func):
def wrapper(*args, **kwargs):
logger = get_indented_logger('yfinance')
logger.debug(f'Entering {func.__name__}()')
with IndentationContext():
result = func(*args, **kwargs)
logger.debug(f'Exiting {func.__name__}()')
return result
return wrapper
class MultiLineFormatter(logging.Formatter):
# The 'fmt' formatting further down is only applied to first line
# of log message, specifically the padding after %level%.
# For multi-line messages, need to manually copy over padding.
def __init__(self, fmt):
super().__init__(fmt)
# Extract amount of padding
match = _re.search(r'%\(levelname\)-(\d+)s', fmt)
self.level_length = int(match.group(1)) if match else 0
def format(self, record):
original = super().format(record)
lines = original.split('\n')
levelname = lines[0].split(' ')[0]
if len(lines) <= 1:
return original
else:
# Apply padding to all lines below first
formatted = [lines[0]]
if self.level_length == 0:
padding = ' ' * len(levelname)
else:
padding = ' ' * self.level_length
padding += ' ' # +1 for space between level and message
formatted.extend(padding + line for line in lines[1:])
return '\n'.join(formatted)
yf_logger = None
yf_log_indented = False
def get_yf_logger():
global yf_logger
if yf_logger is None:
yf_logger = logging.getLogger("yfinance")
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
# Add stream handler if user not already added one
h = logging.StreamHandler()
formatter = logging.Formatter(fmt='%(levelname)s %(message)s')
h.setFormatter(formatter)
yf_logger.addHandler(h)
yf_logger = logging.getLogger('yfinance')
global yf_log_indented
if yf_log_indented:
yf_logger = get_indented_logger('yfinance')
return yf_logger
def setup_debug_formatting():
global yf_logger
yf_logger = get_yf_logger()
if not yf_logger.isEnabledFor(logging.DEBUG):
yf_logger.warning("logging mode not set to 'DEBUG', so not setting up debug formatting")
return
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
h = logging.StreamHandler()
# Ensure different level strings don't interfere with indentation
formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
h.setFormatter(formatter)
yf_logger.addHandler(h)
global yf_log_indented
yf_log_indented = True
def enable_debug_mode():
get_yf_logger().setLevel(logging.DEBUG)
setup_debug_formatting()
##
def is_isin(string):
return bool(_re.match("^([A-Z]{2})([A-Z0-9]{9})([0-9]{1})$", string))