Merge pull request #1660 from ranaroussi/fix/price-repair-100x-and-calibration

Fix price repair: 100x & calibration
dev
ValueRaider 2023-08-13 12:21:55 +01:00 committed by GitHub
commit 677f3d5702
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 127 additions and 99 deletions

View File

@ -1,24 +1,24 @@
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
2022-06-06 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-06-01 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-31 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-30 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-27 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-26 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-25 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-24 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-23 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-20 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-19 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-18 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,532454,0,0
2022-05-17 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-16 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-13 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-12 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-11 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-10 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-09 00:00:00+01:00,14.55,14.55,14.55,14.55,14.55,0,0,0
2022-05-06 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-05 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-04 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-05-03 00:00:00+01:00,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,14.5500004291534,0,0,0
2022-06-06 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-06-01 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-31 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-30 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-27 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-26 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-25 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-24 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-23 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-20 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-19 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-18 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,532454,0,0
2022-05-17 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-16 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-13 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-12 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-11 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-10 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-09 00:00:00+01:00,0.1455,0.1455,0.1455,0.1455,0.1455,0,0,0
2022-05-06 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-05 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-04 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0
2022-05-03 00:00:00+01:00,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0.145500004291534,0,0,0

1 Date Open High Low Close Adj Close Volume Dividends Stock Splits
2 2022-06-06 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
3 2022-06-01 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
4 2022-05-31 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
5 2022-05-30 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
6 2022-05-27 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
7 2022-05-26 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
8 2022-05-25 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
9 2022-05-24 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
10 2022-05-23 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
11 2022-05-20 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
12 2022-05-19 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
13 2022-05-18 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 532454 0 0
14 2022-05-17 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
15 2022-05-16 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
16 2022-05-13 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
17 2022-05-12 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
18 2022-05-11 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
19 2022-05-10 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
20 2022-05-09 00:00:00+01:00 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 14.55 0.1455 0 0 0
21 2022-05-06 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
22 2022-05-05 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
23 2022-05-04 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0
24 2022-05-03 00:00:00+01:00 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 14.5500004291534 0.145500004291534 0 0 0

View File

@ -496,8 +496,8 @@ class TickerBase:
# Limit max reconstruction depth to 2:
if self._reconstruct_start_interval is None:
self._reconstruct_start_interval = interval
if interval != self._reconstruct_start_interval:
logger.debug(f"{self.ticker}: Price repair has hit max depth of 1 ('%s'->'%s')", self._reconstruct_start_interval, interval)
if interval != self._reconstruct_start_interval and interval != nexts[self._reconstruct_start_interval]:
logger.debug(f"{self.ticker}: Price repair has hit max depth of 2 ('%s'->'%s'->'%s')", self._reconstruct_start_interval, nexts[self._reconstruct_start_interval], interval)
return df
df = df.sort_index()
@ -760,7 +760,12 @@ class TickerBase:
weights = weights[:, None] # transpose
weights = np.tile(weights, len(calib_cols)) # 1D -> 2D
weights = weights[calib_filter] # flatten
ratio = np.average(ratios, weights=weights)
not1 = ~np.isclose(ratios, 1.0, rtol=0.00001)
if np.sum(not1) == len(calib_cols):
# Only 1 calibration row in df_new is different to df_block so ignore
ratio = 1.0
else:
ratio = np.average(ratios, weights=weights)
logger.debug(f"Price calibration ratio (raw) = {ratio:6f}")
ratio_rcp = round(1.0 / ratio, 1)
ratio = round(ratio, 1)
@ -1235,19 +1240,26 @@ class TickerBase:
# 100x errors into suspended intervals. Clue is no price change and 0 volume.
# Better to use last active trading interval as baseline.
f_no_activity = (df2['Low'] == df2['High']) & (df2['Volume']==0)
f_no_activity = f_no_activity | df2[OHLC].isna().all(axis=1)
appears_suspended = f_no_activity.any() and np.where(f_no_activity)[0][0]==0
f_active = ~f_no_activity
idx_latest_active = np.where(f_active & np.roll(f_active, 1))[0]
if len(idx_latest_active) == 0:
idx_latest_active = None
else:
idx_latest_active = idx_latest_active[0]
logger.debug(f'price-repair-split: appears_suspended={appears_suspended}, idx_latest_active={idx_latest_active}')
idx_latest_active = int(idx_latest_active[0])
log_msg = f'price-repair-split: appears_suspended={appears_suspended}, idx_latest_active={idx_latest_active}'
if idx_latest_active is not None:
log_msg += f' ({df.index[idx_latest_active].date()})'
logger.debug(log_msg)
if logger.isEnabledFor(logging.DEBUG):
df_debug = df2.copy()
df_debug = df_debug.drop(['Adj Close', 'Low', 'High', 'Volume', 'Dividends', 'Repaired?'], axis=1, errors='ignore')
df_debug = df_debug.drop(['Adj Close', 'Volume', 'Dividends', 'Repaired?'], axis=1, errors='ignore')
debug_cols = ['Low', 'High']
df_debug = df_debug.drop([c for c in OHLC if c not in debug_cols], axis=1, errors='ignore')
else:
debug_cols = []
# Calculate daily price % change. To reduce effect of price volatility,
# calculate change for each OHLC column.
@ -1359,87 +1371,44 @@ class TickerBase:
return df
# if logger.isEnabledFor(logging.DEBUG):
# logger.debug(f"price-repair-split: my workings:")
# logger.debug('\n' + str(df_debug))
# df_debug['i'] = list(range(0, df_debug.shape[0]))
# df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']
# with pd.option_context('display.max_rows', None, 'display.max_columns', 10, 'display.width', 1000): # more options can be specified also
# logger.debug(f"price-repair-split: my workings:" + '\n' + str(df_debug))
def map_signals_to_ranges(f, f_up, f_down):
# Ensure 0th element is False, because True is nonsense
if f[0]:
f = np.copy(f) ; f[0] = False
f_up = np.copy(f_up) ; f_up[0] = False
f_down = np.copy(f_down) ; f_down[0] = False
if not f.any():
return []
true_indices = np.where(f)[0]
ranges = []
idx_first_f = np.where(f)[0][0]
logger.debug(f'idx_latest_active={idx_latest_active} idx_first_f={idx_first_f}')
if appears_suspended and (idx_latest_active is None or idx_latest_active >= idx_first_f):
# baseline = 2nd index, because no active trading since latest split error
for i in range(len(true_indices) - 1):
if i % 2 == 0:
if split > 1.0:
adj = 'split' if f_down[true_indices[i]] else '1.0/split'
else:
adj = '1.0/split' if f_down[true_indices[i]] else 'split'
ranges.append((true_indices[i], true_indices[i + 1], adj))
# First, process prices older than idx_latest_active:
if idx_latest_active is None:
true_indices_old = []
if len(true_indices) % 2 != 0:
if split > 1.0:
adj = 'split' if f_down[true_indices[-1]] else '1.0/split'
else:
true_indices_old = [i for i in true_indices if i > idx_latest_active]
if len(true_indices_old) > 0:
for i in range(len(true_indices_old) - 1):
if i % 2 == 0:
if split > 1.0:
adj = 'split' if f_down[true_indices_old[i]] else '1.0/split'
else:
adj = '1.0/split' if f_down[true_indices_old[i]] else 'split'
ranges.append((true_indices_old[i], true_indices_old[i+1], adj))
if len(true_indices_old) % 2 != 0:
if split > 1.0:
adj = 'split' if f_down[true_indices_old[-1]] else '1.0/split'
else:
adj = '1.0/split' if f_down[true_indices_old[-1]] else 'split'
ranges.append((true_indices_old[-1], len(f), adj))
# Next, process prices more recent than idx_latest_active:
true_indices_recent = [i for i in true_indices if i not in true_indices_old]
if len(true_indices_recent) > 0:
if split > 1.0:
adj = 'split' if f_up[true_indices_recent[0]] else '1.0/split'
else:
adj = '1.0/split' if f_up[true_indices_recent[0]] else 'split'
ranges.append((0, true_indices_recent[0], adj))
for i in range(1, len(true_indices_recent) - 1):
if i % 2 == 1:
if split > 1.0:
adj = '1.0/split' if f_up[true_indices_recent[i]] else 'split'
else:
adj = 'split' if f_up[true_indices_recent[i]] else '1.0/split'
ranges.append((true_indices_recent[i], true_indices_recent[i + 1], adj))
if len(true_indices_recent) % 2 == 0:
if split > 1.0:
adj = 'split' if f_down[true_indices_recent[-1]] else '1.0/split'
else:
adj = '1.0/split' if f_down[true_indices_recent[-1]] else 'split'
ranges.append((true_indices_recent[-1], len(f), adj))
ranges = sorted(ranges, key=lambda x: x[0])
else:
# baseline = 2nd index
for i in range(len(true_indices) - 1):
if i % 2 == 0:
if split > 1.0:
adj = 'split' if f_down[true_indices[i]] else '1.0/split'
else:
adj = '1.0/split' if f_down[true_indices[i]] else 'split'
ranges.append((true_indices[i], true_indices[i + 1], adj))
if len(true_indices) % 2 != 0:
if split > 1.0:
adj = 'split' if f_down[true_indices[-1]] else '1.0/split'
else:
adj = '1.0/split' if f_down[true_indices[-1]] else 'split'
ranges.append((true_indices[-1], len(f), adj))
adj = '1.0/split' if f_down[true_indices[-1]] else 'split'
ranges.append((true_indices[-1], len(f), adj))
return ranges
if idx_latest_active is not None:
idx_rev_latest_active = df.shape[0] - 1 - idx_latest_active
logger.debug(f'price-repair-split: idx_latest_active={idx_latest_active}, idx_rev_latest_active={idx_rev_latest_active}')
if correct_columns_individually:
f_corrected = np.full(n, False)
if correct_volume:
@ -1455,7 +1424,38 @@ class TickerBase:
OHLC_correct_ranges = [None, None, None, None]
for j in range(len(OHLC)):
c = OHLC[j]
ranges = map_signals_to_ranges(f[:, j], f_up[:, j], f_down[:, j])
idx_first_f = np.where(f)[0][0]
if appears_suspended and (idx_latest_active is not None and idx_latest_active >= idx_first_f):
# Suspended midway during data date range.
# 1: process data before suspension in index-ascending (date-descending) order.
# 2: process data after suspension in index-descending order. Requires signals to be reversed,
# then returned ranges to also be reversed, because this logic was originally written for
# index-ascending (date-descending) order.
fj = f[:, j]
f_upj = f_up[:, j]
f_downj = f_down[:, j]
ranges_before = map_signals_to_ranges(fj[idx_latest_active:], f_upj[idx_latest_active:], f_downj[idx_latest_active:])
if len(ranges_before) > 0:
# Shift each range back to global indexing
for i in range(len(ranges_before)):
r = ranges_before[i]
ranges_before[i] = (r[0] + idx_latest_active, r[1] + idx_latest_active, r[2])
f_rev_downj = np.flip(np.roll(f_upj, -1)) # correct
f_rev_upj = np.flip(np.roll(f_downj, -1)) # correct
f_revj = f_rev_upj | f_rev_downj
ranges_after = map_signals_to_ranges(f_revj[idx_rev_latest_active:], f_rev_upj[idx_rev_latest_active:], f_rev_downj[idx_rev_latest_active:])
if len(ranges_after) > 0:
# Shift each range back to global indexing:
for i in range(len(ranges_after)):
r = ranges_after[i]
ranges_after[i] = (r[0] + idx_rev_latest_active, r[1] + idx_rev_latest_active, r[2])
# Flip range to normal ordering
for i in range(len(ranges_after)):
r = ranges_after[i]
ranges_after[i] = (n-r[1], n-r[0], r[2])
ranges = ranges_before ; ranges.extend(ranges_after)
else:
ranges = map_signals_to_ranges(f[:, j], f_up[:, j], f_down[:, j])
logger.debug(f"column '{c}' ranges: {ranges}")
if start_min is not None:
# Prune ranges that are older than start_min
@ -1514,7 +1514,35 @@ class TickerBase:
df2.loc[f_corrected, 'Repaired?'] = True
else:
ranges = map_signals_to_ranges(f, f_up, f_down)
idx_first_f = np.where(f)[0][0]
if appears_suspended and (idx_latest_active is not None and idx_latest_active >= idx_first_f):
# Suspended midway during data date range.
# 1: process data before suspension in index-ascending (date-descending) order.
# 2: process data after suspension in index-descending order. Requires signals to be reversed,
# then returned ranges to also be reversed, because this logic was originally written for
# index-ascending (date-descending) order.
ranges_before = map_signals_to_ranges(f[idx_latest_active:], f_up[idx_latest_active:], f_down[idx_latest_active:])
if len(ranges_before) > 0:
# Shift each range back to global indexing
for i in range(len(ranges_before)):
r = ranges_before[i]
ranges_before[i] = (r[0] + idx_latest_active, r[1] + idx_latest_active, r[2])
f_rev_down = np.flip(np.roll(f_up, -1))
f_rev_up = np.flip(np.roll(f_down, -1))
f_rev = f_rev_up | f_rev_down
ranges_after = map_signals_to_ranges(f_rev[idx_rev_latest_active:], f_rev_up[idx_rev_latest_active:], f_rev_down[idx_rev_latest_active:])
if len(ranges_after) > 0:
# Shift each range back to global indexing:
for i in range(len(ranges_after)):
r = ranges_after[i]
ranges_after[i] = (r[0] + idx_rev_latest_active, r[1] + idx_rev_latest_active, r[2])
# Flip range to normal ordering
for i in range(len(ranges_after)):
r = ranges_after[i]
ranges_after[i] = (n-r[1], n-r[0], r[2])
ranges = ranges_before ; ranges.extend(ranges_after)
else:
ranges = map_signals_to_ranges(f, f_up, f_down)
if start_min is not None:
# Prune ranges that are older than start_min
for i in range(len(ranges)-1, -1, -1):