complete rewrite
parent
bb6378a820
commit
7c1b2997e1
|
@ -1,6 +1,14 @@
|
|||
Change Log
|
||||
===========
|
||||
|
||||
0.1.0
|
||||
-------
|
||||
- Works with v8 API
|
||||
- Introduced Ticker module
|
||||
- Complete re-write of the entire code
|
||||
-
|
||||
- Skipped a bunch of version :)
|
||||
|
||||
0.0.22
|
||||
-------
|
||||
- Deprecated Panel support
|
||||
|
|
142
README.rst
142
README.rst
|
@ -17,10 +17,6 @@ Yahoo! Finance Fix for Pandas Datareader
|
|||
:target: https://travis-ci.org/ranaroussi/fix-yahoo-finance
|
||||
:alt: Travis-CI build status
|
||||
|
||||
.. image:: https://img.shields.io/badge/Patreon-accepting-ff69b4.svg?style=flat
|
||||
:target: https://www.patreon.com/aroussi
|
||||
:alt: Patreon Status
|
||||
|
||||
.. image:: https://img.shields.io/github/stars/ranaroussi/fix-yahoo-finance.svg?style=social&label=Star&maxAge=60
|
||||
:target: https://github.com/ranaroussi/fix-yahoo-finance
|
||||
:alt: Star this repo
|
||||
|
@ -34,9 +30,9 @@ Yahoo! Finance Fix for Pandas Datareader
|
|||
`Yahoo! finance <https://ichart.finance.yahoo.com>`_ has decommissioned
|
||||
their historical data API, causing many programs that relied on it to stop working.
|
||||
|
||||
**fix-yahoo-finance** offers a **temporary fix** to the problem
|
||||
by scraping the data from Yahoo! finance using and return a Pandas
|
||||
DataFrame/Panel in the same format as **pandas_datareader**'s ``get_data_yahoo()``.
|
||||
**fix-yahoo-finance** fixes the problem by scraping the data from Yahoo! finance
|
||||
and returning a Pandas DataFrame in the same format as **pandas_datareader**'s
|
||||
``get_data_yahoo()``.
|
||||
|
||||
By basically "hijacking" ``pandas_datareader.data.get_data_yahoo()`` method,
|
||||
**fix-yahoo-finance**'s implantation is easy and only requires to import
|
||||
|
@ -50,6 +46,78 @@ By basically "hijacking" ``pandas_datareader.data.get_data_yahoo()`` method,
|
|||
Quick Start
|
||||
===========
|
||||
|
||||
The Ticker module
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
The ``Ticker`` module, which allows you to access
|
||||
ticker data in amore Pythonic way:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import fix_yahoo_finance as yf
|
||||
|
||||
msft = yf.Ticker("MSFT")
|
||||
|
||||
# get stock info
|
||||
msft.info
|
||||
|
||||
# get historical market data
|
||||
hist = msft.history(period="max", auto_adjust=True)
|
||||
|
||||
# show actions (dividends, splits)
|
||||
msft.actions
|
||||
|
||||
# show dividends
|
||||
msft.dividends
|
||||
|
||||
# show splits
|
||||
msft.splits
|
||||
|
||||
|
||||
Fetching data for multiple tickers
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code:: python
|
||||
|
||||
import fix_yahoo_finance as yf
|
||||
data = yf.download("SPY AAPL", start="2017-01-01", end="2017-04-30")
|
||||
|
||||
|
||||
I've also added some options to make life easier :)
|
||||
|
||||
.. code:: python
|
||||
|
||||
data = yf.download( # or pdr.get_data_yahoo(...
|
||||
# tickers list or string as well
|
||||
tickers = "SPY IWM TLT",
|
||||
|
||||
# use "period" instead of start/end
|
||||
# valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
|
||||
# (optional, default is '1mo')
|
||||
period = "mtd",
|
||||
|
||||
# fetch data by interval (including intraday if period < 60 days)
|
||||
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
|
||||
# (optional, default is '1d')
|
||||
interval : "1m",
|
||||
|
||||
# group by ticker (to access via data['SPY'])
|
||||
# (optional, default is 'column')
|
||||
group_by = 'ticker',
|
||||
|
||||
# adjust all OHLC automatically
|
||||
# (optional, default is False)
|
||||
auto_adjust = True,
|
||||
|
||||
# download pre/post regular market hours data
|
||||
# (optional, default is False)
|
||||
prepost = True
|
||||
)
|
||||
|
||||
|
||||
``pandas_datareader`` override
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code:: python
|
||||
|
||||
from pandas_datareader import data as pdr
|
||||
|
@ -60,59 +128,6 @@ Quick Start
|
|||
# download dataframe
|
||||
data = pdr.get_data_yahoo("SPY", start="2017-01-01", end="2017-04-30")
|
||||
|
||||
# download Panel
|
||||
data = pdr.get_data_yahoo(["SPY", "IWM"], start="2017-01-01", end="2017-04-30")
|
||||
|
||||
|
||||
I've also added some options to make life easier :)
|
||||
|
||||
Below is the full list of acceptable parameters:
|
||||
|
||||
.. code:: python
|
||||
|
||||
data = pdr.get_data_yahoo(
|
||||
# tickers list (single tickers accepts a string as well)
|
||||
tickers = ["SPY", "IWM", "..."],
|
||||
|
||||
# start date (YYYY-MM-DD / datetime.datetime object)
|
||||
# (optional, defaults is 1950-01-01)
|
||||
start = "2017-01-01",
|
||||
|
||||
# end date (YYYY-MM-DD / datetime.datetime object)
|
||||
# (optional, defaults is Today)
|
||||
end = "2017-04-30",
|
||||
|
||||
# return a multi-index dataframe
|
||||
# (optional, default is Panel, which is deprecated)
|
||||
as_panel = False,
|
||||
|
||||
# group by ticker (to access via data['SPY'])
|
||||
# (optional, default is 'column')
|
||||
group_by = 'ticker',
|
||||
|
||||
# adjust all OHLC automatically
|
||||
# (optional, default is False)
|
||||
auto_adjust = True,
|
||||
|
||||
# download dividend + stock splits data
|
||||
# (optional, default is None)
|
||||
# options are:
|
||||
# - True (returns history + actions)
|
||||
# - 'only' (actions only)
|
||||
actions = True,
|
||||
|
||||
# How may threads to use?
|
||||
threads = 10
|
||||
)
|
||||
|
||||
|
||||
It can also be used as a stand-alone library (without ``pandas_datareader``) if you want:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import fix_yahoo_finance as yf
|
||||
data = yf.download("SPY", start="2017-01-01", end="2017-04-30")
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
@ -127,11 +142,10 @@ Install ``fix_yahoo_finance`` using ``pip``:
|
|||
Requirements
|
||||
------------
|
||||
|
||||
* `Python <https://www.python.org>`_ >=3.4
|
||||
* `Pandas <https://github.com/pydata/pandas>`_ (tested to work with >=0.18.1)
|
||||
* `Python <https://www.python.org>`_ >= 2.7, 3.4+
|
||||
* `Pandas <https://github.com/pydata/pandas>`_ (tested to work with >=0.23.1)
|
||||
* `Numpy <http://www.numpy.org>`_ >= 1.11.1
|
||||
* `requests <http://docs.python-requests.org/en/master/>`_ >= 2.14.2
|
||||
* `multitasking <https://github.com/ranaroussi/multitasking>`_ >= 0.0.3
|
||||
|
||||
|
||||
Optional (if you want to use ``pandas_datareader``)
|
||||
|
@ -142,7 +156,7 @@ Optional (if you want to use ``pandas_datareader``)
|
|||
Legal Stuff
|
||||
------------
|
||||
|
||||
**fix-yahoo-finance** is distributed under the **GNU Lesser General Public License v3.0**. See the `LICENSE.txt <./LICENSE.txt>`_ file in the release for details.
|
||||
**fix-yahoo-finance** is distributed under the **Apache Software License**. See the `LICENSE.txt <./LICENSE.txt>`_ file in the release for details.
|
||||
|
||||
|
||||
P.S.
|
||||
|
|
|
@ -1,88 +1,75 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Yahoo! Finance Fix for Pandas Datareader
|
||||
# Yahoo! Finance market data downloader (+fix for Pandas Datareader)
|
||||
# https://github.com/ranaroussi/fix-yahoo-finance
|
||||
#
|
||||
# Copyright 2017-2018 Ran Aroussi
|
||||
# Copyright 2017-2019 Ran Aroussi
|
||||
#
|
||||
# Licensed under the GNU Lesser General Public License, v3.0 (the "License");
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.gnu.org/licenses/lgpl-3.0.en.html
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
__version__ = "0.0.22"
|
||||
__version__ = "0.1.0"
|
||||
__author__ = "Ran Aroussi"
|
||||
__all__ = ['download', 'get_yahoo_crumb', 'parse_ticker_csv', 'pdr_override']
|
||||
__all__ = ['download', 'Ticker', 'pdr_override',
|
||||
'get_yahoo_crumb', 'parse_ticker_csv']
|
||||
|
||||
|
||||
import datetime as _datetime
|
||||
import time as _time
|
||||
import io as _io
|
||||
import re as _re
|
||||
import warnings as _warnings
|
||||
import sys as _sys
|
||||
|
||||
import numpy as _np
|
||||
import pandas as _pd
|
||||
import datetime as _datetime
|
||||
import requests as _requests
|
||||
import multitasking as _multitasking
|
||||
|
||||
|
||||
_YAHOO_COOKIE = ''
|
||||
_YAHOO_CRUMB = ''
|
||||
_YAHOO_CHECKED = None
|
||||
_YAHOO_TTL = 180
|
||||
|
||||
_DFS = {}
|
||||
_COMPLETED = 0
|
||||
_PROGRESS_BAR = False
|
||||
_FAILED = []
|
||||
|
||||
|
||||
def get_yahoo_crumb(force=False):
|
||||
global _YAHOO_COOKIE, _YAHOO_CRUMB, _YAHOO_CHECKED, _YAHOO_TTL
|
||||
|
||||
# use same cookie for 5 min
|
||||
if _YAHOO_CHECKED and not force:
|
||||
now = _datetime.datetime.now()
|
||||
delta = (now - _YAHOO_CHECKED).total_seconds()
|
||||
if delta < _YAHOO_TTL:
|
||||
return (_YAHOO_CRUMB, _YAHOO_COOKIE)
|
||||
|
||||
res = _requests.get('https://finance.yahoo.com/quote/SPY/history')
|
||||
_YAHOO_COOKIE = res.cookies['B']
|
||||
|
||||
pattern = _re.compile('.*"CrumbStore":\{"crumb":"(?P<crumb>[^"]+)"\}')
|
||||
for line in res.text.splitlines():
|
||||
m = pattern.match(line)
|
||||
if m is not None:
|
||||
_YAHOO_CRUMB = m.groupdict()['crumb']
|
||||
|
||||
# set global params
|
||||
_YAHOO_CHECKED = _datetime.datetime.now()
|
||||
|
||||
return (_YAHOO_CRUMB, _YAHOO_COOKIE)
|
||||
import pandas as _pd
|
||||
import numpy as _np
|
||||
import sys as _sys
|
||||
|
||||
|
||||
def parse_ticker_csv(csv_str, auto_adjust):
|
||||
df = _pd.read_csv(csv_str, index_col=0, error_bad_lines=False
|
||||
).replace('null', _np.nan).dropna()
|
||||
raise DeprecationWarning('This method is deprecated')
|
||||
pass
|
||||
|
||||
df.index = _pd.to_datetime(df.index)
|
||||
df = df.apply(_pd.to_numeric)
|
||||
df['Volume'] = df['Volume'].fillna(0).astype(int)
|
||||
|
||||
if auto_adjust:
|
||||
def get_yahoo_crumb(force=False):
|
||||
raise DeprecationWarning('This method is deprecated')
|
||||
pass
|
||||
|
||||
|
||||
class Ticker():
|
||||
|
||||
def __init__(self, ticker):
|
||||
self.ticker = ticker
|
||||
self._history = None
|
||||
self._base_url = 'https://query1.finance.yahoo.com'
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
""" retreive metadata and currenct price data """
|
||||
url = "{}/v7/finance/quote?symbols={}".format(
|
||||
self._base_url, self.ticker)
|
||||
r = _requests.get(url=url).json()["quoteResponse"]["result"]
|
||||
if len(r) > 0:
|
||||
return r[0]
|
||||
return {}
|
||||
|
||||
"""
|
||||
# @todo
|
||||
def _options(self):
|
||||
# https://query1.finance.yahoo.com/v7/finance/options/SPY
|
||||
pass
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _auto_adjust(data):
|
||||
df = data.copy()
|
||||
ratio = df["Close"] / df["Adj Close"]
|
||||
df["Adj Open"] = df["Open"] / ratio
|
||||
df["Adj High"] = df["High"] / ratio
|
||||
|
@ -97,278 +84,225 @@ def parse_ticker_csv(csv_str, auto_adjust):
|
|||
"Adj Low": "Low", "Adj Close": "Close"
|
||||
}, inplace=True)
|
||||
|
||||
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
|
||||
df = df[["Open", "High", "Low", "Close", "Volume"]]
|
||||
return df
|
||||
|
||||
return df.groupby(df.index).first()
|
||||
@staticmethod
|
||||
def _parse_quotes(data):
|
||||
timestamps = data["timestamp"]
|
||||
ohlc = data["indicators"]["quote"][0]
|
||||
volumes = ohlc["volume"]
|
||||
opens = ohlc["open"]
|
||||
closes = ohlc["close"]
|
||||
lows = ohlc["low"]
|
||||
highs = ohlc["high"]
|
||||
|
||||
adjclose = closes
|
||||
if "adjclose" in data["indicators"]:
|
||||
adjclose = data["indicators"]["adjclose"][0]["adjclose"]
|
||||
|
||||
quotes = _pd.DataFrame({"Open": opens,
|
||||
"High": highs,
|
||||
"Low": lows,
|
||||
"Close": closes,
|
||||
"Adj Close": adjclose,
|
||||
"Volume": volumes})
|
||||
|
||||
quotes = _np.round(quotes, data["meta"]["priceHint"])
|
||||
quotes.index = _pd.to_datetime(timestamps, unit="s")
|
||||
quotes.sort_index(inplace=True)
|
||||
return quotes
|
||||
|
||||
@staticmethod
|
||||
def _parse_actions(data):
|
||||
dividends = _pd.DataFrame(columns=["Dividends"])
|
||||
splits = _pd.DataFrame(columns=["Stock Splits"])
|
||||
|
||||
if "events" in data:
|
||||
if "dividends" in data["events"]:
|
||||
dividends = _pd.DataFrame(data["events"]["dividends"].values())
|
||||
dividends.set_index("date", inplace=True)
|
||||
dividends.index = _pd.to_datetime(dividends.index, unit="s")
|
||||
dividends.sort_index(inplace=True)
|
||||
dividends.columns = ["Dividends"]
|
||||
|
||||
if "splits" in data["events"]:
|
||||
splits = _pd.DataFrame(data["events"]["splits"].values())
|
||||
splits.set_index("date", inplace=True)
|
||||
splits.index = _pd.to_datetime(
|
||||
splits.index, unit="s")
|
||||
splits.sort_index(inplace=True)
|
||||
splits["Stock Splits"] = splits["numerator"] / \
|
||||
splits["denominator"]
|
||||
splits = splits["Stock Splits"]
|
||||
|
||||
return dividends, splits
|
||||
|
||||
@property
|
||||
def dividends(self):
|
||||
if self._history is None:
|
||||
self._history = self.history(period="max")
|
||||
dividends = self._history["Dividends"]
|
||||
return dividends[dividends != 0]
|
||||
|
||||
@property
|
||||
def splits(self):
|
||||
if self._history is None:
|
||||
self.history(period="max")
|
||||
splits = self._history["Stock Splits"]
|
||||
return splits[splits != 0]
|
||||
|
||||
@property
|
||||
def actions(self):
|
||||
if self._history is None:
|
||||
self.history(period="max")
|
||||
actions = self._history[["Dividends", "Stock Splits"]]
|
||||
return actions[actions != 0].dropna(how='all').fillna(0)
|
||||
|
||||
def history(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False, auto_adjust=False):
|
||||
"""
|
||||
:Parameters:
|
||||
period : str
|
||||
Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
|
||||
Either Use period parameter or use start and end
|
||||
interval : str
|
||||
Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
|
||||
Intraday data cannot extend last 60 days
|
||||
start: str
|
||||
Download start date string (YYYY-MM-DD) or _datetime.
|
||||
Default is 1900-01-01
|
||||
end: str
|
||||
Download end date string (YYYY-MM-DD) or _datetime.
|
||||
Default is now
|
||||
prepost : bool
|
||||
Include Pre and Post market data in results?
|
||||
Default is False
|
||||
auto_adjust: bool
|
||||
Adjust all OHLC automatically? Default is False
|
||||
"""
|
||||
|
||||
if period is None or period == "max":
|
||||
if start is None:
|
||||
start = -2208988800
|
||||
elif isinstance(start, _datetime.datetime):
|
||||
start = int(_time.mktime(start.timetuple()))
|
||||
else:
|
||||
start = int(_time.mktime(
|
||||
_time.strptime(str(start), '%Y-%m-%d')))
|
||||
if end is None:
|
||||
end = int(_time.time())
|
||||
elif isinstance(end, _datetime.datetime):
|
||||
end = int(_time.mktime(end.timetuple()))
|
||||
else:
|
||||
end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d')))
|
||||
|
||||
params = {"period1": start, "period2": end}
|
||||
else:
|
||||
params = {"range": period}
|
||||
|
||||
params["interval"] = interval.lower()
|
||||
params["includePrePost"] = prepost
|
||||
params["events"] = "div,splits"
|
||||
|
||||
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
|
||||
data = _requests.get(url=url, params=params).json()
|
||||
|
||||
# Getting data from json
|
||||
error = data["chart"]["error"]
|
||||
if error:
|
||||
raise ValueError(error["description"])
|
||||
|
||||
# quotes
|
||||
quotes = self._parse_quotes(data["chart"]["result"][0])
|
||||
if auto_adjust:
|
||||
quotes = self._auto_adjust(quotes)
|
||||
|
||||
quotes.dropna(inplace=True)
|
||||
|
||||
# actions
|
||||
dividends, splits = self._parse_actions(data["chart"]["result"][0])
|
||||
|
||||
# combine
|
||||
df = _pd.concat([quotes, dividends, splits], axis=1, sort=True)
|
||||
df["Dividends"].fillna(0, inplace=True)
|
||||
df["Stock Splits"].fillna(0, inplace=True)
|
||||
|
||||
# index eod/intraday
|
||||
df.index = df.index.tz_localize("UTC").tz_convert(
|
||||
data["chart"]["result"][0]["meta"]["exchangeTimezoneName"])
|
||||
|
||||
if params["interval"][-1] == "m":
|
||||
df.index.name = "Datetime"
|
||||
else:
|
||||
df.index = df.index.date
|
||||
df.index.name = "Date"
|
||||
|
||||
self._history = df
|
||||
return df
|
||||
|
||||
|
||||
def make_chunks(l, n):
|
||||
"""Yield successive n-sized chunks from l."""
|
||||
for i in range(0, len(l), n):
|
||||
yield l[i:i + n]
|
||||
|
||||
|
||||
def download(tickers, start=None, end=None, as_panel=False,
|
||||
def download(tickers, start=None, end=None, actions=None, threads=None,
|
||||
group_by='column', auto_adjust=False, progress=True,
|
||||
actions=None, threads=1, **kwargs):
|
||||
period="1mo", interval="1d", prepost=False, **kwargs):
|
||||
"""Download yahoo tickers
|
||||
:Parameters:
|
||||
|
||||
tickers : str, list
|
||||
List of tickers to download
|
||||
period : str
|
||||
Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
|
||||
Either Use period parameter or use start and end
|
||||
interval : str
|
||||
Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
|
||||
Intraday data cannot extend last 60 days
|
||||
start: str
|
||||
Download start date string (YYYY-MM-DD) or _datetime. Default is 1950-01-01
|
||||
Download start date string (YYYY-MM-DD) or _datetime.
|
||||
Default is 1900-01-01
|
||||
end: str
|
||||
Download end date string (YYYY-MM-DD) or _datetime. Default is today
|
||||
as_panel : bool
|
||||
Deprecated
|
||||
Download end date string (YYYY-MM-DD) or _datetime.
|
||||
Default is now
|
||||
group_by : str
|
||||
Group by ticker or 'column' (default)
|
||||
Group by 'ticker' or 'column' (default)
|
||||
prepost : bool
|
||||
Include Pre and Post market data in results?
|
||||
Default is False
|
||||
auto_adjust: bool
|
||||
Adjust all OHLC automatically? Default is False
|
||||
actions: str
|
||||
Download dividend + stock splits data. Default is None (no actions)
|
||||
Options are 'inline' (returns history + actions) and 'only' (actions only)
|
||||
Deprecated: actions are always downloaded
|
||||
threads: int
|
||||
How may threads to use? Default is 1 thread
|
||||
Deprecated
|
||||
"""
|
||||
|
||||
global _DFS, _COMPLETED, _PROGRESS_BAR, _FAILED
|
||||
|
||||
_COMPLETED = 0
|
||||
_FAILED = []
|
||||
|
||||
# format start
|
||||
if start is None:
|
||||
start = int(_time.mktime(_time.strptime('1950-01-01', '%Y-%m-%d')))
|
||||
elif isinstance(start, _datetime.datetime):
|
||||
start = int(_time.mktime(start.timetuple()))
|
||||
else:
|
||||
start = int(_time.mktime(_time.strptime(str(start), '%Y-%m-%d')))
|
||||
|
||||
# format end
|
||||
if end is None:
|
||||
end = int(_time.mktime(_datetime.datetime.now().timetuple()))
|
||||
elif isinstance(end, _datetime.datetime):
|
||||
end = int(_time.mktime(end.timetuple()))
|
||||
else:
|
||||
end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d')))
|
||||
|
||||
# create ticker list
|
||||
tickers = tickers if isinstance(tickers, list) else [tickers]
|
||||
tickers = tickers if isinstance(tickers, list) else tickers.split()
|
||||
tickers = [x.upper() for x in tickers]
|
||||
|
||||
# initiate progress bar
|
||||
if progress:
|
||||
_PROGRESS_BAR = _ProgressBar(len(tickers), 'downloaded')
|
||||
|
||||
# download using single thread
|
||||
if threads is None or threads < 2:
|
||||
download_chunk(tickers, start=start, end=end,
|
||||
auto_adjust=auto_adjust, progress=progress,
|
||||
actions=actions, **kwargs)
|
||||
# threaded download
|
||||
else:
|
||||
threads = min([threads, len(tickers)])
|
||||
|
||||
# download in chunks
|
||||
chunks = 0
|
||||
for chunk in make_chunks(tickers, max([1, len(tickers) // threads])):
|
||||
chunks += len(chunk)
|
||||
download_thread(chunk, start=start, end=end,
|
||||
auto_adjust=auto_adjust, progress=progress,
|
||||
actions=actions, **kwargs)
|
||||
if not tickers[-chunks:].empty:
|
||||
download_thread(tickers[-chunks:], start=start, end=end,
|
||||
auto_adjust=auto_adjust, progress=progress,
|
||||
actions=actions, **kwargs)
|
||||
|
||||
# wait for completion
|
||||
while _COMPLETED < len(tickers):
|
||||
_time.sleep(0.1)
|
||||
_DFS = {}
|
||||
for ticker in tickers:
|
||||
data = Ticker(ticker).history(period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
auto_adjust=auto_adjust)
|
||||
_DFS[ticker] = data
|
||||
if progress:
|
||||
_PROGRESS_BAR.animate()
|
||||
|
||||
if progress:
|
||||
_PROGRESS_BAR.completed()
|
||||
|
||||
# create multiIndex df
|
||||
data = _pd.concat(_DFS.values(), axis=1, keys=_DFS.keys())
|
||||
if group_by == 'column':
|
||||
data.columns = data.columns.swaplevel(0, 1)
|
||||
data.sort_index(level=0, axis=1, inplace=True)
|
||||
if auto_adjust:
|
||||
data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
|
||||
else:
|
||||
data = data[['Open', 'High', 'Low',
|
||||
'Close', 'Adj Close', 'Volume']]
|
||||
|
||||
# return single df if only one ticker
|
||||
if len(tickers) == 1:
|
||||
data = _DFS[tickers[0]]
|
||||
|
||||
if _FAILED:
|
||||
print("\nThe following tickers failed to download:\n",
|
||||
', '.join(_FAILED))
|
||||
|
||||
_DFS = {}
|
||||
return data
|
||||
|
||||
|
||||
def download_one(ticker, start, end, interval, auto_adjust=None, actions=None):
|
||||
|
||||
tried_once = False
|
||||
crumb, cookie = get_yahoo_crumb()
|
||||
|
||||
url_str = "https://query1.finance.yahoo.com/v7/finance/download/%s"
|
||||
url_str += "?period1=%s&period2=%s&interval=%s&events=%s&crumb=%s"
|
||||
|
||||
actions = None if '^' in ticker else actions
|
||||
|
||||
if actions:
|
||||
url = url_str % (ticker, start, end, interval, 'div', crumb)
|
||||
res = _requests.get(url, cookies={'B': cookie}).text
|
||||
# print(res)
|
||||
div = _pd.DataFrame(columns=['action', 'value'])
|
||||
if "error" not in res:
|
||||
div = _pd.read_csv(_io.StringIO(res),
|
||||
index_col=0, error_bad_lines=False
|
||||
).replace('null', _np.nan).dropna()
|
||||
|
||||
if isinstance(div, _pd.DataFrame):
|
||||
div.index = _pd.to_datetime(div.index)
|
||||
div["action"] = "DIVIDEND"
|
||||
div = div.rename(columns={'Dividends': 'value'})
|
||||
div['value'] = div['value'].astype(float)
|
||||
|
||||
# download Stock Splits data
|
||||
url = url_str % (ticker, start, end, interval, 'split', crumb)
|
||||
res = _requests.get(url, cookies={'B': cookie}).text
|
||||
split = _pd.DataFrame(columns=['action', 'value'])
|
||||
if "error" not in res:
|
||||
split = _pd.read_csv(_io.StringIO(res),
|
||||
index_col=0, error_bad_lines=False
|
||||
).replace('null', _np.nan).dropna()
|
||||
|
||||
if isinstance(split, _pd.DataFrame):
|
||||
split.index = _pd.to_datetime(split.index)
|
||||
split["action"] = "SPLIT"
|
||||
split = split.rename(columns={'Stock Splits': 'value'})
|
||||
if not split.empty:
|
||||
split['value'] = split.apply(
|
||||
lambda x: 1 / eval(x['value']), axis=1).astype(float)
|
||||
|
||||
if actions == 'only':
|
||||
return _pd.concat([div, split]).sort_index()
|
||||
|
||||
# download history
|
||||
url = url_str % (ticker, start, end, interval, 'history', crumb)
|
||||
res = _requests.get(url, cookies={'B': cookie}).text
|
||||
hist = _pd.DataFrame(
|
||||
columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])
|
||||
|
||||
if "error" in res:
|
||||
return _pd.DataFrame()
|
||||
|
||||
hist = parse_ticker_csv(_io.StringIO(res), auto_adjust)
|
||||
|
||||
if not hist.empty:
|
||||
if actions is None:
|
||||
return hist
|
||||
|
||||
hist['Dividends'] = div['value'] if not div.empty else _np.nan
|
||||
hist['Dividends'].fillna(0, inplace=True)
|
||||
hist['Stock Splits'] = split['value'] if not split.empty else _np.nan
|
||||
hist['Stock Splits'].fillna(1, inplace=True)
|
||||
|
||||
return hist
|
||||
|
||||
# empty len(hist.index) == 0
|
||||
if not tried_once:
|
||||
tried_once = True
|
||||
get_yahoo_crumb(force=True)
|
||||
return download_one(ticker, start, end, interval, auto_adjust, actions)
|
||||
|
||||
|
||||
@_multitasking.task
|
||||
def download_thread(tickers, start=None, end=None,
|
||||
auto_adjust=False, progress=True,
|
||||
actions=False, **kwargs):
|
||||
download_chunk(tickers, start=start, end=end,
|
||||
auto_adjust=auto_adjust, progress=progress,
|
||||
actions=actions, **kwargs)
|
||||
|
||||
|
||||
def download_chunk(tickers, start=None, end=None,
|
||||
auto_adjust=False, progress=True,
|
||||
actions=False, **kwargs):
|
||||
|
||||
global _DFS, _COMPLETED, _PROGRESS_BAR, _FAILED
|
||||
|
||||
interval = kwargs["interval"] if "interval" in kwargs else "1d"
|
||||
|
||||
# url template
|
||||
url_str = "https://query1.finance.yahoo.com/v7/finance/download/%s"
|
||||
url_str += "?period1=%s&period2=%s&interval=%s&events=%s&crumb=%s"
|
||||
|
||||
# failed tickers collectors
|
||||
round1_failed_tickers = []
|
||||
|
||||
# start downloading
|
||||
for ticker in tickers:
|
||||
|
||||
# yahoo crumb/cookie
|
||||
# crumb, cookie = get_yahoo_crumb()
|
||||
get_yahoo_crumb()
|
||||
|
||||
tried_once = False
|
||||
try:
|
||||
hist = download_one(ticker, start, end,
|
||||
interval, auto_adjust, actions)
|
||||
if isinstance(hist, _pd.DataFrame):
|
||||
_DFS[ticker] = hist
|
||||
if progress:
|
||||
_PROGRESS_BAR.animate()
|
||||
else:
|
||||
round1_failed_tickers.append(ticker)
|
||||
except:
|
||||
# something went wrong...
|
||||
# try one more time using a new cookie/crumb
|
||||
if not tried_once:
|
||||
tried_once = True
|
||||
try:
|
||||
get_yahoo_crumb(force=True)
|
||||
hist = download_one(ticker, start, end,
|
||||
interval, auto_adjust, actions)
|
||||
if isinstance(hist, _pd.DataFrame):
|
||||
_DFS[ticker] = hist
|
||||
if progress:
|
||||
_PROGRESS_BAR.animate()
|
||||
else:
|
||||
round1_failed_tickers.append(ticker)
|
||||
except:
|
||||
round1_failed_tickers.append(ticker)
|
||||
_time.sleep(0.001)
|
||||
|
||||
# try failed items again before giving up
|
||||
_COMPLETED += len(tickers) - len(round1_failed_tickers)
|
||||
|
||||
if round1_failed_tickers:
|
||||
get_yahoo_crumb(force=True)
|
||||
for ticker in round1_failed_tickers:
|
||||
try:
|
||||
hist = download_one(ticker, start, end,
|
||||
interval, auto_adjust, actions)
|
||||
if isinstance(hist, _pd.DataFrame):
|
||||
_DFS[ticker] = hist
|
||||
if progress:
|
||||
_PROGRESS_BAR.animate()
|
||||
else:
|
||||
_FAILED.append(ticker)
|
||||
except:
|
||||
_FAILED.append(ticker)
|
||||
_time.sleep(0.000001)
|
||||
_COMPLETED += 1
|
||||
|
||||
|
||||
class _ProgressBar:
|
||||
def __init__(self, iterations, text='completed'):
|
||||
self.text = text
|
||||
|
@ -425,5 +359,6 @@ def pdr_override():
|
|||
try:
|
||||
import pandas_datareader
|
||||
pandas_datareader.data.get_data_yahoo = download
|
||||
except:
|
||||
pandas_datareader.data.get_data_yahoo_actions = download
|
||||
except Exception:
|
||||
pass
|
||||
|
|
Loading…
Reference in New Issue