# pip install pandas
import pandas as pd
import numpy as np
folder = r'M:\DEV\github__a_moscatelli\repositories\home\am-wiki-assets\equityaccumulator'+'\\'
hist_csv_filename = folder + 'JPM.csv'
summary = {}
verbose = False
charting = True
functions
def load_past(hist_csv_filename):
histcolname='Date'
filedateformat='%Y-%m-%d'
#yh = pd.read_csv(hist_csv_filename)
yh = pd.read_csv(
hist_csv_filename,
#parse_dates=[histcolname],
#date_parser=lambda x: datetime.strptime(x,filedateformat),
index_col=histcolname)
yh = yh.rename(columns={'Adj Close': 'AdjClose'})
return yh
def print_summary():
# https://stackoverflow.com/questions/18193205/list-comprehension-returning-values-plus-none-none-none-why
devnull=[ print(kk,'=>',summary[kk]) for kk in summary.keys()]
execution
yhz=load_past(hist_csv_filename)
yhz
| Open | High | Low | Close | AdjClose | Volume | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2022-05-31 | 130.179993 | 133.149994 | 129.710007 | 132.229996 | 127.951584 | 17015000 |
| 2022-06-01 | 132.869995 | 132.869995 | 128.669998 | 129.910004 | 125.706657 | 10768000 |
| 2022-06-02 | 129.899994 | 132.009995 | 128.669998 | 132.000000 | 127.729042 | 8374800 |
| 2022-06-03 | 131.210007 | 131.770004 | 129.990005 | 130.160004 | 125.948578 | 8596000 |
| 2022-06-06 | 131.089996 | 132.389999 | 129.460007 | 129.729996 | 125.532494 | 11323500 |
| ... | ... | ... | ... | ... | ... | ... |
| 2023-05-22 | 141.000000 | 141.639999 | 137.660004 | 138.029999 | 138.029999 | 11402500 |
| 2023-05-23 | 137.539993 | 139.029999 | 136.460007 | 136.589996 | 136.589996 | 9514900 |
| 2023-05-24 | 135.399994 | 136.350006 | 135.080002 | 135.339996 | 135.339996 | 8578000 |
| 2023-05-25 | 134.820007 | 136.270004 | 134.580002 | 135.669998 | 135.669998 | 8472500 |
| 2023-05-26 | 136.110001 | 137.600006 | 135.630005 | 136.940002 | 136.940002 | 8576700 |
250 rows × 6 columns
summary['hist_filename']=hist_csv_filename
summary['hist_shape']=yhz.shape
summary['hist_firstdt']=yhz.index[0]
summary['hist_lastdt']=yhz.index[-1]
functions
def add_columns(df,AdjClose):
#AdjClose='AdjClose'
df['LagAdjClose'] = df[AdjClose].shift(1)
df['DailyLogRet'] = np.log(df[AdjClose]/df['LagAdjClose'])
return df
def getstats(df,DailyLogRet):
sigma = df[DailyLogRet].std()
mu = df[DailyLogRet].mean()
cnt = df[DailyLogRet].count()
return {'sigma':sigma,'mu':mu,'dretCount':cnt}
execution
yh=add_columns(yhz,'AdjClose')
yh.DailyLogRet.describe()
count 249.000000 mean 0.000273 std 0.017309 min -0.055657 25% -0.010019 50% 0.000264 75% 0.009317 max 0.072795 Name: DailyLogRet, dtype: float64
yhs = getstats(yh,'DailyLogRet')
summary['hist_stats'] = yhs
yhs
{'sigma': 0.017308744241388962, 'mu': 0.0002726544013365923, 'dretCount': 249}
functions
import matplotlib.pyplot as plt
#from scipy.stats import norm
import math
from datetime import datetime
# source: https://en.wikipedia.org/wiki/Geometric_Brownian_motion#Simulating_sample_paths
def get_futpath(drift,vol,n,lastclose):
dt = 0.1
x0 = lastclose
#n = 252
mu = drift
sigma = vol
sz=1
x = np.exp(
(mu - sigma ** 2 / 2) * dt
+
sigma * np.random.normal(0, np.sqrt(dt), size=(sz, n)).T
)
x = np.vstack([np.ones(sz), x])
x = x0 * x.cumprod(axis=0)
rp = x[:,0]
df = pd.DataFrame({'AdjClose':rp},index=None) # , index=rows, columns=columns) # was px
df=add_columns(df,'AdjClose') # was px
ss=getstats(df,'DailyLogRet')
if verbose: print('path stats:',ss)
delete_the_first = True # the first is the last of the provided historical prices
xfut = rp
assert len(xfut) == n+1
if delete_the_first:
xfut = np.delete(rp, 0) # xfut[0] is going to be = xpast[-1], hence this removal.
assert len(xfut) == n
return xfut # rp
#math.sqrt(250)
execution
random_seed = 1
np.random.seed(random_seed)
rpath = []
fullpath = []
noPaths = 252 # *2 # 1000
futprices = 252
summary['paths'] = noPaths
summary['hist_lastclose'] = yh.AdjClose.iloc[-1]
print('hist profile:',yhs)
lastclose = yh.AdjClose.iloc[-1]
summary['futprices'] = futprices
summary['exec_start_rand'] = datetime.now()
for ipath in range(noPaths):
#print('ipath:',ipath)
xpath=get_futpath(yhs['mu'],yhs['sigma'],futprices,lastclose)
rpath.append(xpath)
hist profile: {'sigma': 0.017308744241388962, 'mu': 0.0002726544013365923, 'dretCount': 249}
xpast = yh.AdjClose.values # = yh.AdjClose.to_numpy()
# building the hist+fut paths = the inputs of the pricer
for ipath in range(noPaths):
xfut = rpath[ipath]
full = np.concatenate([xpast,xfut])
fullpath.append(full)
if True:
for ipath in range(noPaths):
plt.plot(fullpath[ipath])
plt.title("Realizations of Geometric Brownian Motion")
#plt.show()
import yaml # pip install pyyaml
from datetime import datetime, timedelta
functions
def load_contract_spec(contract):
with open(contract, 'r') as file:
contract_spec = yaml.safe_load(file)
if False:
contract_spec['dates']['startdate'] = datetime.strptime(contract_spec['dates']['startdate'], contract_spec['dates']['dateformat'])
contract_spec['dates']['enddate'] = datetime.strptime(contract_spec['dates']['enddate'], contract_spec['dates']['dateformat'])
# 2022-07-08
del contract_spec['dates']['dateformat']
#print(contract_spec)
return contract_spec
execution
contract = 'contract.yml'
contract_spec = load_contract_spec(folder + contract)
summary['contract_specs']='contract.yml'
summary['exec_start_simul'] = datetime.now()
import math
import statistics
functions
class Hdf:
df = None
def _dbg(self,trace):
print('count of dates after','<'+trace+'>',':',self.df.shape[0])
#def __init__(self):
#self._dbg('init')
def load_df(self,df):
self.df = df
def load_csv(self, hist_csv_filename,histcolname,filedateformat):
self.df = pd.read_csv(
hist_csv_filename,
parse_dates=[histcolname],
date_parser=lambda x: datetime.strptime(x,filedateformat),
index_col=histcolname)
self._dbg('init')
# https://www.dataquest.io/blog/tutorial-time-series-analysis-with-pandas/ follows the same steps
False and print(self.df.dtypes)
def withSignal(self,colname,lambdasignal,activation):
assert False
assert activation in ['up','down']
self.df[colname] = self.df.apply(lambda row: lambdasignal(row), axis=1).copy()
fdf = self.df[self.df[colname]]
if fdf.shape[0]>=0:
activation_index = fdf.index[0]
print('activation_index',activation_index)
if activation=='up':
self.df[colname] = self.df.apply(lambda row: row.name >= activation_index, axis=1).copy()
if activation=='down':
self.df[colname] = self.df.apply(lambda row: row.name <= activation_index, axis=1).copy()
self._dbg(colname)
return self
def hist(field,date,dayoffset,hdf):
#global hdf
# historical data lookup - relative to T
assert dayoffset <= 0
# date is str
#date_ = datetime.strptime(date,"%Y-%m-%d")
#date2 = date + timedelta(days=dayoffset) # may not be included in the df
#print('hist:',date,field,dayoffset,date2)
ret = hdf.df.shift(dayoffset).at[date,field]
#print('shifted hist:',hdf.df.shift(dayoffset).loc[date])
#ret = hdf.df.at[date2,field]
#print('ret:',ret)
if not ret: ret=0
return ret
#lastdate=yh.index[-1]
#lastdt=datetime.strptime(lastdate,"%Y-%m-%d")
def getnextdt(lastdt,considerPublicHolidays, useStrings):
assert not considerPublicHolidays
if useStrings:
lastdt=datetime.strptime(lastdt,"%Y-%m-%d")
dow_eur=lastdt.strftime("%a")
#print(lastdt,dow_eur)
offset=1
if dow_eur=='Fri': offset=3
if dow_eur=='Sat': offset=2
lastdt = lastdt + timedelta(days=offset)
if useStrings:
return datetime.strftime(lastdt,"%Y-%m-%d")
else:
return lastdt
def get_future_dates(lastdate,path):
futdt = []
curdt=lastdate
futpathx=rpath[path]
for px in futpathx:
curdt=getnextdt(curdt,False,True)
futdt.append(curdt)
#print(px)
#pass
assert len(futdt) == len(rpath[path])
return futdt
def compute1_noncumul(hdf,contract_spec):
#print(contract_spec['dates']['startdate'], contract_spec['dates']['enddate'])
hdf.df['live'] = hdf.df.apply(lambda row: contract_spec['dates']['startdate'] <= row.name <= contract_spec['dates']['enddate'], axis=1)
hdf.df['KI'] = hdf.df.apply(lambda row: row.live and eval(contract_spec['knock-in'],{'row':row}), axis=1).cummax()
# the input of cummax is a df column having true when the KI condition is met. cummax() keeps confirming True in the future once met. cummax = carryover of True.
hdf.df['KO'] = hdf.df.apply(lambda row: row.KI and eval(contract_spec['knock-out'],{'row':row}), axis=1).cummax()
hdf.df['accumulation'] = hdf.df['KI'] & ~hdf.df['KO']
# bitwise is inside the KI zone and not inside the KO zone
return hdf
def compute2_cumul(hdf,contract_spec):
hdf.df.insert(0, 'SN', range(0, 0 + len(hdf.df)))
hdf.df['BQty'] = hdf.df.apply(lambda row: row.accumulation and eval(contract_spec['buy']['qty'], {'row':row, 'math':math,'hist':hist,'H':hdf,'T':row.name,'BQ':BQ,'SQ':SQ}), axis=1)
hdf.df['BAt'] = hdf.df.apply(lambda row: row.accumulation and eval(contract_spec['buy']['at'], {'row':row, 'math':math,'hist':hist,'H':hdf,'T':row.name}), axis=1)
hdf.df['BWhen'] = hdf.df.apply(lambda row: row.accumulation and eval(contract_spec['buy']['when'], {'row':row, 'math':math,'hist':hist,'H':hdf,'T':row.name}), axis=1)
hdf.df['SQty'] = hdf.df.apply(lambda row: row.accumulation and eval(contract_spec['sell']['qty'], {'row':row, 'math':math,'hist':hist,'H':hdf,'T':row.name,'BQ':BQ,'SQ':SQ}), axis=1)
hdf.df['SAt'] = hdf.df.apply(lambda row: row.accumulation and eval(contract_spec['sell']['at'], {'row':row, 'math':math,'hist':hist,'H':hdf,'T':row.name}), axis=1)
hdf.df['SWhen'] = hdf.df.apply(lambda row: row.accumulation and eval(contract_spec['sell']['when'],{'row':row, 'math':math,'hist':hist,'H':hdf,'T':row.name}), axis=1)
#cashflow
hdf.df['BCF'] = hdf.df.apply(lambda row: row.accumulation and row.BWhen and - (row.BQty * row.BAt),axis=1)
hdf.df['SCF'] = hdf.df.apply(lambda row: row.accumulation and row.SWhen and + (row.SQty * row.SAt),axis=1)
#cumul
hdf.df['CumQty']= hdf.df.apply(lambda row: row.BWhen and row.BQty - row.SWhen and row.SQty , axis=1).cumsum()
hdf.df['CumCF'] = hdf.df.apply(lambda row: row.BCF + row.SCF, axis=1).cumsum()
return hdf
def get_lastCumCF_array():
lastCumCF_array = []
for ipath in range(noPaths):
#plt.plot(fullpath[ipath])
#plt.title("Realizations of Geometric Brownian Motion")
#plt.show()
rpathx=rpath[ipath]
if verbose: print('min/max/avg path px',min(rpathx),max(rpathx),statistics.mean(rpathx))
futdf = pd.DataFrame({'AdjClose':rpathx},index=futdt)
fulldf = pd.concat([yh,futdf])
# simulate accumulation
#del hdf0
hdf0 = Hdf()
hdf0.load_df(fulldf)
hdf = compute1_noncumul(hdf0,contract_spec)
accumulation_occurs = hdf.df.loc[hdf.df.accumulation].shape[0] > 0
if accumulation_occurs:
acc_a = hdf.df.loc[hdf.df.accumulation].head(1).index.values[0]
acc_z = hdf.df.loc[hdf.df.accumulation].tail(1).index.values[0]
if verbose: print('Accumulation','begins on:',acc_a,'ends on',acc_z) # .loc[hdf.df['accumulation']==True].idxmin())
# .loc[hdf.df.accumulation==True].idxmax())
else:
if verbose: print('Accumulation begins/ends: NEVER')
hdf = compute2_cumul(hdf,contract_spec)
# hdf.df
lastCumCF=hdf.df["CumCF"].iloc[-1]
avgCumCF=hdf.df["CumCF"].mean()
if verbose: print('ipath',ipath,'CumCF',lastCumCF,'avgCumCF',avgCumCF)
lastCumCF_array.append(lastCumCF)
if False:
plt.plot(hdf.df["CumCF"]) #, df["Y"])
plt.show()
return lastCumCF_array
execution
assert max(True,False)==True and min(True,False)==False
# ... as a verification of any platform-dependent logic
lastdate=yh.index[-1]
futdt = get_future_dates(lastdate,0)
summary['fut_firstdt'] = futdt[0]
summary['fut_lastdt'] = futdt[-1]
print_summary()
hist_filename => M:\DEV\github__a_moscatelli\repositories\home\am-wiki-assets\equityaccumulator\JPM.csv
hist_shape => (250, 6)
hist_firstdt => 2022-05-31
hist_lastdt => 2023-05-26
hist_stats => {'sigma': 0.017308744241388962, 'mu': 0.0002726544013365923, 'dretCount': 249}
paths => 252
hist_lastclose => 136.940002
futprices => 252
exec_start_rand => 2023-09-05 11:48:22.049632
contract_specs => contract.yml
exec_start_simul => 2023-09-05 11:48:23.954958
fut_firstdt => 2023-05-29
fut_lastdt => 2024-05-14
prsdata = pd.DataFrame(None, columns = ["BQ", "SQ","P_Loss","MaxCumCF","MinCumCF"], index=None)
sample_histog_data = None
for prs in range(20):
BQ = np.random.randint(1, 10) # (inclusive,exclusive)
SQ = np.random.randint(1, 10) # (inclusive,exclusive)
print('going to start pricing '+str(noPaths)+' paths with prescriptive scenario',prs,'BQ/SQ',(BQ,SQ),'time:',datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))
# MAP
lastCumCF_array = get_lastCumCF_array()
# REDUCE
sample_histog_data = lastCumCF_array
Ploss = len(list(filter(lambda cumcf: cumcf < 0, lastCumCF_array))) / len(lastCumCF_array)
max_lastCumCF_array = max(lastCumCF_array)
min_lastCumCF_array = min(lastCumCF_array)
prsdata.loc[len(prsdata)] = [BQ, SQ, Ploss, max_lastCumCF_array, min_lastCumCF_array]
going to start pricing 252 paths with prescriptive scenario 0 BQ/SQ (2, 4) time: 2023-09-05-11:48:24 going to start pricing 252 paths with prescriptive scenario 1 BQ/SQ (9, 5) time: 2023-09-05-11:49:36 going to start pricing 252 paths with prescriptive scenario 2 BQ/SQ (3, 9) time: 2023-09-05-11:50:53 going to start pricing 252 paths with prescriptive scenario 3 BQ/SQ (7, 4) time: 2023-09-05-11:52:03 going to start pricing 252 paths with prescriptive scenario 4 BQ/SQ (2, 3) time: 2023-09-05-11:53:13 going to start pricing 252 paths with prescriptive scenario 5 BQ/SQ (1, 7) time: 2023-09-05-11:54:24 going to start pricing 252 paths with prescriptive scenario 6 BQ/SQ (9, 2) time: 2023-09-05-11:55:35 going to start pricing 252 paths with prescriptive scenario 7 BQ/SQ (5, 8) time: 2023-09-05-11:56:46 going to start pricing 252 paths with prescriptive scenario 8 BQ/SQ (8, 9) time: 2023-09-05-11:57:57 going to start pricing 252 paths with prescriptive scenario 9 BQ/SQ (8, 7) time: 2023-09-05-11:59:07 going to start pricing 252 paths with prescriptive scenario 10 BQ/SQ (8, 3) time: 2023-09-05-12:00:17 going to start pricing 252 paths with prescriptive scenario 11 BQ/SQ (9, 1) time: 2023-09-05-12:01:30 going to start pricing 252 paths with prescriptive scenario 12 BQ/SQ (6, 4) time: 2023-09-05-12:02:46 going to start pricing 252 paths with prescriptive scenario 13 BQ/SQ (8, 6) time: 2023-09-05-12:04:12 going to start pricing 252 paths with prescriptive scenario 14 BQ/SQ (5, 1) time: 2023-09-05-12:05:36 going to start pricing 252 paths with prescriptive scenario 15 BQ/SQ (3, 7) time: 2023-09-05-12:06:47 going to start pricing 252 paths with prescriptive scenario 16 BQ/SQ (3, 9) time: 2023-09-05-12:07:58 going to start pricing 252 paths with prescriptive scenario 17 BQ/SQ (7, 4) time: 2023-09-05-12:09:16 going to start pricing 252 paths with prescriptive scenario 18 BQ/SQ (9, 7) time: 2023-09-05-12:10:32 going to start pricing 252 paths with prescriptive scenario 19 BQ/SQ (6, 8) time: 2023-09-05-12:11:46
summary['exec_end'] = datetime.now()
if False:
plt.hist(sample_histog_data, density=True, bins=30)
plt.title("Distribution of Accounting P/L "+str(prs))
None # to prevent non-required output
print_summary()
hist_filename => M:\DEV\github__a_moscatelli\repositories\home\am-wiki-assets\equityaccumulator\JPM.csv
hist_shape => (250, 6)
hist_firstdt => 2022-05-31
hist_lastdt => 2023-05-26
hist_stats => {'sigma': 0.017308744241388962, 'mu': 0.0002726544013365923, 'dretCount': 249}
paths => 252
hist_lastclose => 136.940002
futprices => 252
exec_start_rand => 2023-09-05 11:48:22.049632
contract_specs => contract.yml
exec_start_simul => 2023-09-05 11:48:23.954958
fut_firstdt => 2023-05-29
fut_lastdt => 2024-05-14
exec_end => 2023-09-05 12:13:04.989987
import json
devnull = contract_spec.pop('offline',None)
#ie print the contract specs excluding the 'offline' key
print(json.dumps(contract_spec, indent=4))
{
"contract": "CC1",
"file": "jpm.csv",
"market": "NYSE",
"desc": "first cocoa. t = date of last close",
"knock-in": "row.AdjClose > 145",
"knock-out": "row.AdjClose <= 120",
"dates": {
"startdate": "2022-06-02",
"enddate": "2024-05-22",
"dateformat": "%Y-%m-%d",
"filedateformat": "%Y-%m-%d"
},
"comments": "can use min, max, avg, abs, math.ceil(), ...",
"missing": "forward linear",
"buy": {
"qty": "BQ",
"at": "hist('AdjClose',T,0,H)",
"when": "hist('AdjClose',T,0,H) > hist('AdjClose',T,-1,H)"
},
"sell": {
"qty": "SQ",
"at": "hist('AdjClose',T,0,H)",
"when": "hist('AdjClose',T,0,H) < hist('AdjClose',T,-1,H)",
"desc": "if 2% up or more, sell"
}
}
prsdata
| BQ | SQ | P_Loss | MaxCumCF | MinCumCF | |
|---|---|---|---|---|---|
| 0 | 2.0 | 4.0 | 0.000000 | 50121.170219 | 0.000000 |
| 1 | 9.0 | 5.0 | 0.535714 | 2748.737467 | -85903.633487 |
| 2 | 3.0 | 9.0 | 0.000000 | 132204.524247 | 0.000000 |
| 3 | 7.0 | 4.0 | 0.535714 | 2457.155996 | -65206.350738 |
| 4 | 2.0 | 3.0 | 0.019841 | 31113.580580 | -2010.434897 |
| 5 | 1.0 | 7.0 | 0.000000 | 120098.533306 | 0.000000 |
| 6 | 9.0 | 2.0 | 0.543651 | 873.607231 | -129308.466805 |
| 7 | 5.0 | 8.0 | 0.011905 | 87287.746269 | -3055.383862 |
| 8 | 8.0 | 9.0 | 0.107143 | 67431.553401 | -19865.959868 |
| 9 | 8.0 | 7.0 | 0.392857 | 29416.374122 | -43441.160708 |
| 10 | 8.0 | 3.0 | 0.543651 | 1310.410846 | -97257.408772 |
| 11 | 9.0 | 1.0 | 0.543651 | 436.803615 | -143776.744578 |
| 12 | 6.0 | 4.0 | 0.523810 | 3747.986108 | -47732.051820 |
| 13 | 8.0 | 6.0 | 0.500000 | 10408.784483 | -55562.105740 |
| 14 | 5.0 | 1.0 | 0.543651 | 436.803615 | -73445.623533 |
| 15 | 3.0 | 7.0 | 0.000000 | 94189.344968 | 0.000000 |
| 16 | 3.0 | 9.0 | 0.000000 | 132204.524247 | 0.000000 |
| 17 | 7.0 | 4.0 | 0.535714 | 2457.155996 | -65206.350738 |
| 18 | 9.0 | 7.0 | 0.476190 | 16461.779953 | -59477.132699 |
| 19 | 6.0 | 8.0 | 0.035714 | 74333.152100 | -9972.711451 |
print(prsdata[prsdata.P_Loss == prsdata.P_Loss.min()])
BQ SQ P_Loss MaxCumCF MinCumCF 0 2.0 4.0 0.0 50121.170219 0.0 2 3.0 9.0 0.0 132204.524247 0.0 5 1.0 7.0 0.0 120098.533306 0.0 15 3.0 7.0 0.0 94189.344968 0.0 16 3.0 9.0 0.0 132204.524247 0.0
ax = prsdata.plot(kind='scatter',x='BQ',y='SQ',c='P_Loss',title='PLoss(BQ,SQ)')
ax.set(xlabel="buy qty", ylabel="sell qty")
None