Recently I was following a paper and in the example they used Pyfolio which is an awesome performance and risk analysis library in Python developed by Quantopian Inc when they were still around. Given that Quantopian is no longer around nobody is maintaining this library. I ran into a few errors and figured I would outline the solutions below in case anyone has these issues. But before I dive too deep into modifying this library you may be better off just uninstalling Pyfolio and loading Pyfolio-reloaded. But that is not the purpose of this article.
Today I want to discuss the output of Pyfolio. It was written to output in a Jupyter Notebook, which no real programmer uses. Then if you output it to the console the format is horrible and all over the place. So I ended up rewriting some of the Pyfolio files so that when you run create_full_tear_sheet() it will generate an HTML file for later analysis. Here is a sample of the output.
Sample Output
tears.py
#
# Copyright 2019 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from time import time
import empyrical as ep
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from IPython.display import display, Markdown
import os
import glob
import datetime
import base64
from . import capacity
from . import perf_attrib
from . import plotting
from . import pos
from . import round_trips
from . import timeseries
from . import txn
from . import utils
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
FACTOR_PARTITIONS = {
"style": [
"momentum",
"size",
"value",
"reversal_short_term",
"volatility",
],
"sector": [
"basic_materials",
"consumer_cyclical",
"financial_services",
"real_estate",
"consumer_defensive",
"health_care",
"utilities",
"communication_services",
"energy",
"industrials",
"technology",
],
}
def save_plot(fig_or_ax, plot_name, directory="./plots/temp"):
"""Save a matplotlib figure or axes object."""
logging.info(f'Saving {plot_name} to {directory}')
if not os.path.exists(directory):
os.makedirs(directory)
filepath = os.path.join(directory, f"{plot_name}.png")
if isinstance(fig_or_ax, plt.Figure):
fig_or_ax.savefig(filepath)
else:
fig_or_ax.get_figure().savefig(filepath)
print(f"Plot saved: {filepath}")
def timer(msg_body, previous_time):
current_time = time()
run_time = current_time - previous_time
message = "\nFinished " + msg_body + " (required {:.2f} seconds)."
print(message.format(run_time))
return current_time
def create_full_tear_sheet(
returns,
positions=None,
transactions=None,
market_data=None,
benchmark_rets=None,
slippage=None,
live_start_date=None,
sector_mappings=None,
round_trips=False,
estimate_intraday="infer",
hide_positions=False,
cone_std=(1.0, 1.5, 2.0),
bootstrap=False,
unadjusted_returns=None,
turnover_denom="AGB",
set_context=True,
factor_returns=None,
factor_loadings=None,
pos_in_dollars=True,
header_rows=None,
factor_partitions=FACTOR_PARTITIONS,
):
"""
Generate a number of tear sheets that are useful
for analyzing a strategy's performance.
- Fetches benchmarks if needed.
- Creates tear sheets for returns, and significant events.
If possible, also creates tear sheets for position analysis
and transaction analysis.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- Time series with decimal returns.
- Example:
2015-07-16 -0.012143
2015-07-17 0.045350
2015-07-20 0.030957
2015-07-21 0.004902
positions : pd.DataFrame, optional
Daily net position values.
- Time series of dollar amount invested in each position and cash.
- Days where stocks are not held can be represented by 0 or NaN.
- Non-working capital is labelled 'cash'
- Example:
index 'AAPL' 'MSFT' cash
2004-01-09 13939.3800 -14012.9930 711.5585
2004-01-12 14492.6300 -14624.8700 27.1821
2004-01-13 -13853.2800 13653.6400 -43.6375
transactions : pd.DataFrame, optional
Executed trade volumes and fill prices.
- One row per trade.
- Trades on different names that occur at the
same time will have identical indicies.
- Example:
index amount price symbol
2004-01-09 12:18:01 483 324.12 'AAPL'
2004-01-09 12:18:01 122 83.10 'MSFT'
2004-01-13 14:12:23 -75 340.43 'AAPL'
market_data : pd.DataFrame, optional
Daily market_data
- DataFrame has a multi-index index, one level is dates and another is
market_data contains volume & price, equities as columns
slippage : int/float, optional
Basis points of slippage to apply to returns before generating
tearsheet stats and plots.
If a value is provided, slippage parameter sweep
plots will be generated from the unadjusted returns.
Transactions and positions must also be passed.
- See txn.adjust_returns_for_slippage for more details.
live_start_date : datetime, optional
The point in time when the strategy began live trading,
after its backtest period. This datetime should be normalized.
hide_positions : bool, optional
If True, will not output any symbol names.
round_trips: boolean, optional
If True, causes the generation of a round trip tear sheet.
sector_mappings : dict or pd.Series, optional
Security identifier to sector mapping.
Security ids as keys, sectors as values.
estimate_intraday: boolean or str, optional
Instead of using the end-of-day positions, use the point in the day
where we have the most $ invested. This will adjust positions to
better approximate and represent how an intraday strategy behaves.
By default, this is 'infer', and an attempt will be made to detect
an intraday strategy. Specifying this value will prevent detection.
cone_std : float, or tuple, optional
If float, The standard deviation to use for the cone plots.
If tuple, Tuple of standard deviation values to use for the cone plots
- The cone is a normal distribution with this standard deviation
centered around a linear regression.
bootstrap : boolean (optional)
Whether to perform bootstrap analysis for the performance
metrics. Takes a few minutes longer.
turnover_denom : str
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
factor_returns : pd.Dataframe, optional
Returns by factor, with date as index and factors as columns
factor_loadings : pd.Dataframe, optional
Factor loadings for all days in the date range, with date and
ticker as index, and factors as columns.
pos_in_dollars : boolean, optional
indicates whether positions is in dollars
header_rows : dict or OrderedDict, optional
Extra rows to display at the top of the perf stats table.
set_context : boolean, optional
If True, set default plotting style context.
- See plotting.context().
factor_partitions : dict, optional
dict specifying how factors should be separated in perf attrib
factor returns and risk exposures plots
- See create_perf_attrib_tear_sheet().
"""
if (
(unadjusted_returns is None)
and (slippage is not None)
and (transactions is not None)
):
unadjusted_returns = returns.copy()
returns = txn.adjust_returns_for_slippage(
returns, positions, transactions, slippage
)
positions = utils.check_intraday(
estimate_intraday, returns, positions, transactions
)
create_returns_tear_sheet(
returns,
positions=positions,
transactions=transactions,
live_start_date=live_start_date,
cone_std=cone_std,
benchmark_rets=benchmark_rets,
bootstrap=bootstrap,
turnover_denom=turnover_denom,
header_rows=header_rows,
set_context=set_context,
)
create_interesting_times_tear_sheet(
returns, benchmark_rets=benchmark_rets, set_context=set_context
)
if positions is not None:
create_position_tear_sheet(
returns,
positions,
hide_positions=hide_positions,
set_context=set_context,
sector_mappings=sector_mappings,
estimate_intraday=False,
)
if transactions is not None:
create_txn_tear_sheet(
returns,
positions,
transactions,
unadjusted_returns=unadjusted_returns,
estimate_intraday=False,
set_context=set_context,
)
if round_trips:
create_round_trip_tear_sheet(
returns=returns,
positions=positions,
transactions=transactions,
sector_mappings=sector_mappings,
estimate_intraday=False,
)
if market_data is not None:
create_capacity_tear_sheet(
returns,
positions,
transactions,
market_data,
liquidation_daily_vol_limit=0.2,
last_n_days=125,
estimate_intraday=False,
)
if factor_returns is not None and factor_loadings is not None:
create_perf_attrib_tear_sheet(
returns,
positions,
factor_returns,
factor_loadings,
transactions,
pos_in_dollars=pos_in_dollars,
factor_partitions=factor_partitions,
)
# Location where the individual HTML files are saved
html_files_dir = './plots/temp'
plots_dir = './plots/temp' # Directory where PNG files are saved
# Aggregate HTML content from tables
aggregated_html_content = ''
for html_file in glob.glob(os.path.join(html_files_dir, "*.html")):
with open(html_file, 'r') as file:
aggregated_html_content += file.read() + '<br><hr><br>'
# Embed PNG files into HTML content
for png_file in glob.glob(os.path.join(plots_dir, "*.png")):
with open(png_file, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
img_tag = f'<img src="data:image/png;base64,{encoded_string}" style="width:100%"><br><hr><br>'
aggregated_html_content += img_tag
# Save the aggregated content to a new HTML file
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f")
aggregated_filename = f"full_tearsheet.html"
aggregated_file_path = os.path.join('./plots', aggregated_filename)
with open(aggregated_file_path, 'w') as file:
file.write(aggregated_html_content)
logging.info(f"Aggregated tearsheet saved to {aggregated_file_path}")
# Delete individual HTML files to avoid duplication in the future
for html_file in glob.glob(os.path.join(html_files_dir, "*.html")):
os.remove(html_file)
@plotting.customize
def create_simple_tear_sheet(
returns,
positions=None,
transactions=None,
benchmark_rets=None,
slippage=None,
estimate_intraday="infer",
live_start_date=None,
turnover_denom="AGB",
header_rows=None,
):
"""
Simpler version of create_full_tear_sheet; generates summary performance
statistics and important plots as a single image.
- Plots: cumulative returns, rolling beta, rolling Sharpe, underwater,
exposure, top 10 holdings, total holdings, long/short holdings,
daily turnover, transaction time distribution.
- Never accept market_data input (market_data = None)
- Never accept sector_mappings input (sector_mappings = None)
- Never perform bootstrap analysis (bootstrap = False)
- Never hide posistions on top 10 holdings plot (hide_positions = False)
- Always use default cone_std (cone_std = (1.0, 1.5, 2.0))
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- Time series with decimal returns.
- Example:
2015-07-16 -0.012143
2015-07-17 0.045350
2015-07-20 0.030957
2015-07-21 0.004902
positions : pd.DataFrame, optional
Daily net position values.
- Time series of dollar amount invested in each position and cash.
- Days where stocks are not held can be represented by 0 or NaN.
- Non-working capital is labelled 'cash'
- Example:
index 'AAPL' 'MSFT' cash
2004-01-09 13939.3800 -14012.9930 711.5585
2004-01-12 14492.6300 -14624.8700 27.1821
2004-01-13 -13853.2800 13653.6400 -43.6375
transactions : pd.DataFrame, optional
Executed trade volumes and fill prices.
- One row per trade.
- Trades on different names that occur at the
same time will have identical indicies.
- Example:
index amount price symbol
2004-01-09 12:18:01 483 324.12 'AAPL'
2004-01-09 12:18:01 122 83.10 'MSFT'
2004-01-13 14:12:23 -75 340.43 'AAPL'
benchmark_rets : pd.Series, optional
Daily returns of the benchmark, noncumulative.
slippage : int/float, optional
Basis points of slippage to apply to returns before generating
tearsheet stats and plots.
If a value is provided, slippage parameter sweep
plots will be generated from the unadjusted returns.
Transactions and positions must also be passed.
- See txn.adjust_returns_for_slippage for more details.
live_start_date : datetime, optional
The point in time when the strategy began live trading,
after its backtest period. This datetime should be normalized.
turnover_denom : str, optional
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
header_rows : dict or OrderedDict, optional
Extra rows to display at the top of the perf stats table.
set_context : boolean, optional
If True, set default plotting style context.
"""
positions = utils.check_intraday(
estimate_intraday, returns, positions, transactions
)
if (slippage is not None) and (transactions is not None):
returns = txn.adjust_returns_for_slippage(
returns, positions, transactions, slippage
)
always_sections = 4
positions_sections = 4 if positions is not None else 0
transactions_sections = 2 if transactions is not None else 0
live_sections = 1 if live_start_date is not None else 0
benchmark_sections = 1 if benchmark_rets is not None else 0
vertical_sections = sum(
[
always_sections,
positions_sections,
transactions_sections,
live_sections,
benchmark_sections,
]
)
if live_start_date is not None:
live_start_date = ep.utils.get_utc_timestamp(live_start_date)
plotting.show_perf_stats(
returns,
benchmark_rets,
positions=positions,
transactions=transactions,
turnover_denom=turnover_denom,
live_start_date=live_start_date,
header_rows=header_rows,
)
fig = plt.figure(figsize=(14, vertical_sections * 6))
gs = gridspec.GridSpec(vertical_sections, 3, wspace=0.5, hspace=0.5)
ax_rolling_returns = plt.subplot(gs[:2, :])
i = 2
if benchmark_rets is not None:
ax_rolling_beta = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_rolling_sharpe = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_underwater = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
plotting.plot_rolling_returns(
returns,
factor_returns=benchmark_rets,
live_start_date=live_start_date,
cone_std=(1.0, 1.5, 2.0),
ax=ax_rolling_returns,
)
ax_rolling_returns.set_title("Cumulative returns")
if benchmark_rets is not None:
plotting.plot_rolling_beta(returns, benchmark_rets, ax=ax_rolling_beta)
plotting.plot_rolling_sharpe(returns, ax=ax_rolling_sharpe)
plotting.plot_drawdown_underwater(returns, ax=ax_underwater)
if positions is not None:
# Plot simple positions tear sheet
ax_exposures = plt.subplot(gs[i, :])
i += 1
ax_top_positions = plt.subplot(gs[i, :], sharex=ax_exposures)
i += 1
ax_holdings = plt.subplot(gs[i, :], sharex=ax_exposures)
i += 1
ax_long_short_holdings = plt.subplot(gs[i, :])
i += 1
positions_alloc = pos.get_percent_alloc(positions)
plotting.plot_exposures(returns, positions, ax=ax_exposures)
plotting.show_and_plot_top_positions(
returns,
positions_alloc,
show_and_plot=0,
hide_positions=False,
ax=ax_top_positions,
)
plotting.plot_holdings(returns, positions_alloc, ax=ax_holdings)
plotting.plot_long_short_holdings(
returns, positions_alloc, ax=ax_long_short_holdings
)
if transactions is not None:
# Plot simple transactions tear sheet
ax_turnover = plt.subplot(gs[i, :])
i += 1
ax_txn_timings = plt.subplot(gs[i, :])
i += 1
plotting.plot_turnover(
returns,
transactions,
positions,
turnover_denom=turnover_denom,
ax=ax_turnover,
)
plotting.plot_txn_time_hist(transactions, ax=ax_txn_timings)
for ax in fig.axes:
ax.tick_params(
axis="x",
which="major",
bottom=True,
top=False,
labelbottom=True,
)
@plotting.customize
def create_returns_tear_sheet(
returns,
positions=None,
transactions=None,
live_start_date=None,
cone_std=(1.0, 1.5, 2.0),
benchmark_rets=None,
bootstrap=False,
turnover_denom="AGB",
header_rows=None,
return_fig=False,
):
"""
Generate a number of plots for analyzing a strategy's returns.
- Fetches benchmarks, then creates the plots on a single figure.
- Plots: rolling returns (with cone), rolling beta, rolling sharpe,
rolling Fama-French risk factors, drawdowns, underwater plot, monthly
and annual return plots, daily similarity plots,
and return quantile box plot.
- Will also print the start and end dates of the strategy,
performance statistics, drawdown periods, and the return range.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
positions : pd.DataFrame, optional
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame, optional
Executed trade volumes and fill prices.
- See full explanation in create_full_tear_sheet.
live_start_date : datetime, optional
The point in time when the strategy began live trading,
after its backtest period.
cone_std : float, or tuple, optional
If float, The standard deviation to use for the cone plots.
If tuple, Tuple of standard deviation values to use for the cone plots
- The cone is a normal distribution with this standard deviation
centered around a linear regression.
benchmark_rets : pd.Series, optional
Daily noncumulative returns of the benchmark.
- This is in the same style as returns.
bootstrap : boolean, optional
Whether to perform bootstrap analysis for the performance
metrics. Takes a few minutes longer.
turnover_denom : str, optional
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
header_rows : dict or OrderedDict, optional
Extra rows to display at the top of the perf stats table.
return_fig : boolean, optional
If True, returns the figure that was plotted on.
"""
if benchmark_rets is not None:
returns = utils.clip_returns_to_benchmark(returns, benchmark_rets)
plotting.show_perf_stats(
returns,
benchmark_rets,
positions=positions,
transactions=transactions,
turnover_denom=turnover_denom,
bootstrap=bootstrap,
live_start_date=live_start_date,
header_rows=header_rows,
)
plotting.show_worst_drawdown_periods(returns)
vertical_sections = 11
if live_start_date is not None:
vertical_sections += 1
live_start_date = ep.utils.get_utc_timestamp(live_start_date)
if benchmark_rets is not None:
vertical_sections += 1
if bootstrap:
vertical_sections += 1
fig = plt.figure(figsize=(14, vertical_sections * 6))
gs = gridspec.GridSpec(vertical_sections, 3, wspace=0.5, hspace=0.5)
ax_rolling_returns = plt.subplot(gs[:2, :])
i = 2
ax_rolling_returns_vol_match = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_rolling_returns_log = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_returns = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
if benchmark_rets is not None:
ax_rolling_beta = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_rolling_volatility = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_rolling_sharpe = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_drawdown = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_underwater = plt.subplot(gs[i, :], sharex=ax_rolling_returns)
i += 1
ax_monthly_heatmap = plt.subplot(gs[i, 0])
ax_annual_returns = plt.subplot(gs[i, 1])
ax_monthly_dist = plt.subplot(gs[i, 2])
i += 1
ax_return_quantiles = plt.subplot(gs[i, :])
i += 1
plotting.plot_rolling_returns(
returns,
factor_returns=benchmark_rets,
live_start_date=live_start_date,
cone_std=cone_std,
ax=ax_rolling_returns,
)
ax_rolling_returns.set_title("Cumulative returns")
plotting.plot_rolling_returns(
returns,
factor_returns=benchmark_rets,
live_start_date=live_start_date,
cone_std=None,
volatility_match=(benchmark_rets is not None),
legend_loc=None,
ax=ax_rolling_returns_vol_match,
)
ax_rolling_returns_vol_match.set_title(
"Cumulative returns volatility matched to benchmark"
)
plotting.plot_rolling_returns(
returns,
factor_returns=benchmark_rets,
logy=True,
live_start_date=live_start_date,
cone_std=cone_std,
ax=ax_rolling_returns_log,
)
ax_rolling_returns_log.set_title("Cumulative returns on logarithmic scale")
plotting.plot_returns(
returns,
live_start_date=live_start_date,
ax=ax_returns,
)
ax_returns.set_title("Returns")
if benchmark_rets is not None:
plotting.plot_rolling_beta(returns, benchmark_rets, ax=ax_rolling_beta)
plotting.plot_rolling_volatility(
returns, factor_returns=benchmark_rets, ax=ax_rolling_volatility
)
plotting.plot_rolling_sharpe(returns, ax=ax_rolling_sharpe)
# Drawdowns
plotting.plot_drawdown_periods(returns, top=5, ax=ax_drawdown)
plotting.plot_drawdown_underwater(returns=returns, ax=ax_underwater)
plotting.plot_monthly_returns_heatmap(returns, ax=ax_monthly_heatmap)
plotting.plot_annual_returns(returns, ax=ax_annual_returns)
plotting.plot_monthly_returns_dist(returns, ax=ax_monthly_dist)
plotting.plot_return_quantiles(
returns, live_start_date=live_start_date, ax=ax_return_quantiles
)
if bootstrap and (benchmark_rets is not None):
ax_bootstrap = plt.subplot(gs[i, :])
plotting.plot_perf_stats(returns, benchmark_rets, ax=ax_bootstrap)
elif bootstrap:
raise ValueError("bootstrap requires passing of benchmark_rets.")
for ax in fig.axes:
ax.tick_params(
axis="x",
which="major",
bottom=True,
top=False,
labelbottom=True,
)
save_plot(fig,'Full Tear Sheet')
if return_fig:
return fig
@plotting.customize
def create_position_tear_sheet(
returns,
positions,
show_and_plot_top_pos=2,
hide_positions=False,
sector_mappings=None,
transactions=None,
estimate_intraday="infer",
return_fig=False,
):
"""
Generate a number of plots for analyzing a
strategy's positions and holdings.
- Plots: gross leverage, exposures, top positions, and holdings.
- Will also print the top positions held.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
show_and_plot_top_pos : int, optional
By default, this is 2, and both prints and plots the
top 10 positions.
If this is 0, it will only plot; if 1, it will only print.
hide_positions : bool, optional
If True, will not output any symbol names.
Overrides show_and_plot_top_pos to 0 to suppress text output.
sector_mappings : dict or pd.Series, optional
Security identifier to sector mapping.
Security ids as keys, sectors as values.
transactions : pd.DataFrame, optional
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
estimate_intraday: boolean or str, optional
Approximate returns for intraday strategies.
See description in create_full_tear_sheet.
return_fig : boolean, optional
If True, returns the figure that was plotted on.
"""
positions = utils.check_intraday(
estimate_intraday, returns, positions, transactions
)
if hide_positions:
show_and_plot_top_pos = 0
vertical_sections = 7 if sector_mappings is not None else 6
fig = plt.figure(figsize=(14, vertical_sections * 6))
gs = gridspec.GridSpec(vertical_sections, 3, wspace=0.5, hspace=0.5)
ax_exposures = plt.subplot(gs[0, :])
ax_top_positions = plt.subplot(gs[1, :], sharex=ax_exposures)
ax_max_median_pos = plt.subplot(gs[2, :], sharex=ax_exposures)
ax_holdings = plt.subplot(gs[3, :], sharex=ax_exposures)
ax_long_short_holdings = plt.subplot(gs[4, :])
ax_gross_leverage = plt.subplot(gs[5, :], sharex=ax_exposures)
positions_alloc = pos.get_percent_alloc(positions)
plotting.plot_exposures(returns, positions, ax=ax_exposures)
plotting.show_and_plot_top_positions(
returns,
positions_alloc,
show_and_plot=show_and_plot_top_pos,
hide_positions=hide_positions,
ax=ax_top_positions,
)
plotting.plot_max_median_position_concentration(positions, ax=ax_max_median_pos)
plotting.plot_holdings(returns, positions_alloc, ax=ax_holdings)
plotting.plot_long_short_holdings(
returns, positions_alloc, ax=ax_long_short_holdings
)
plotting.plot_gross_leverage(returns, positions, ax=ax_gross_leverage)
if sector_mappings is not None:
sector_exposures = pos.get_sector_exposures(positions, sector_mappings)
if len(sector_exposures.columns) > 1:
sector_alloc = pos.get_percent_alloc(sector_exposures)
sector_alloc = sector_alloc.drop("cash", axis="columns")
ax_sector_alloc = plt.subplot(gs[6, :], sharex=ax_exposures)
plotting.plot_sector_allocations(returns, sector_alloc, ax=ax_sector_alloc)
for ax in fig.axes:
ax.tick_params(
axis="x",
which="major",
bottom=True,
top=False,
labelbottom=True,
)
save_plot(fig,'Position Tear Sheet')
if return_fig:
return fig
@plotting.customize
def create_txn_tear_sheet(
returns,
positions,
transactions,
turnover_denom="AGB",
unadjusted_returns=None,
estimate_intraday="infer",
return_fig=False,
):
"""
Generate a number of plots for analyzing a strategy's transactions.
Plots: turnover, daily volume, and a histogram of daily volume.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
turnover_denom : str, optional
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
unadjusted_returns : pd.Series, optional
Daily unadjusted returns of the strategy, noncumulative.
Will plot additional swippage sweep analysis.
- See pyfolio.plotting.plot_swippage_sleep and
pyfolio.plotting.plot_slippage_sensitivity
estimate_intraday: boolean or str, optional
Approximate returns for intraday strategies.
See description in create_full_tear_sheet.
return_fig : boolean, optional
If True, returns the figure that was plotted on.
"""
positions = utils.check_intraday(
estimate_intraday, returns, positions, transactions
)
vertical_sections = 6 if unadjusted_returns is not None else 4
fig = plt.figure(figsize=(14, vertical_sections * 6))
gs = gridspec.GridSpec(vertical_sections, 3, wspace=0.5, hspace=0.5)
ax_turnover = plt.subplot(gs[0, :])
ax_daily_volume = plt.subplot(gs[1, :], sharex=ax_turnover)
ax_turnover_hist = plt.subplot(gs[2, :])
ax_txn_timings = plt.subplot(gs[3, :])
plotting.plot_turnover(
returns,
transactions,
positions,
turnover_denom=turnover_denom,
ax=ax_turnover,
)
plotting.plot_daily_volume(returns, transactions, ax=ax_daily_volume)
try:
plotting.plot_daily_turnover_hist(
transactions,
positions,
turnover_denom=turnover_denom,
ax=ax_turnover_hist,
)
except ValueError:
warnings.warn("Unable to generate turnover plot.", UserWarning)
plotting.plot_txn_time_hist(transactions, ax=ax_txn_timings)
if unadjusted_returns is not None:
ax_slippage_sweep = plt.subplot(gs[4, :])
plotting.plot_slippage_sweep(
unadjusted_returns, positions, transactions, ax=ax_slippage_sweep
)
ax_slippage_sensitivity = plt.subplot(gs[5, :])
plotting.plot_slippage_sensitivity(
unadjusted_returns,
positions,
transactions,
ax=ax_slippage_sensitivity,
)
for ax in fig.axes:
ax.tick_params(
axis="x",
which="major",
bottom=True,
top=False,
labelbottom=True,
)
save_plot(fig,'TXN Tear Sheet')
if return_fig:
return fig
@plotting.customize
def create_round_trip_tear_sheet(
returns,
positions,
transactions,
sector_mappings=None,
estimate_intraday="infer",
return_fig=False,
):
"""
Generate a number of figures and plots describing the duration,
frequency, and profitability of trade "round trips."
A round trip is started when a new long or short position is
opened and is only completed when the number of shares in that
position returns to or crosses zero.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
sector_mappings : dict or pd.Series, optional
Security identifier to sector mapping.
Security ids as keys, sectors as values.
estimate_intraday: boolean or str, optional
Approximate returns for intraday strategies.
See description in create_full_tear_sheet.
return_fig : boolean, optional
If True, returns the figure that was plotted on.
"""
positions = utils.check_intraday(
estimate_intraday, returns, positions, transactions
)
transactions_closed = round_trips.add_closing_transactions(positions, transactions)
# extract_round_trips requires BoD portfolio_value
trades = round_trips.extract_round_trips(
transactions_closed,
portfolio_value=positions.sum(axis="columns") / (1 + returns),
)
if len(trades) < 5:
warnings.warn(
"""Fewer than 5 round-trip trades made.
Skipping round trip tearsheet.""",
UserWarning,
)
return
round_trips.print_round_trip_stats(trades)
plotting.show_profit_attribution(trades)
if sector_mappings is not None:
sector_trades = round_trips.apply_sector_mappings_to_round_trips(
trades, sector_mappings
)
plotting.show_profit_attribution(sector_trades)
fig = plt.figure(figsize=(14, 3 * 6))
gs = gridspec.GridSpec(3, 2, wspace=0.5, hspace=0.5)
ax_trade_lifetimes = plt.subplot(gs[0, :])
ax_prob_profit_trade = plt.subplot(gs[1, 0])
ax_holding_time = plt.subplot(gs[1, 1])
ax_pnl_per_round_trip_dollars = plt.subplot(gs[2, 0])
ax_pnl_per_round_trip_pct = plt.subplot(gs[2, 1])
plotting.plot_round_trip_lifetimes(trades, ax=ax_trade_lifetimes)
plotting.plot_prob_profit_trade(trades, ax=ax_prob_profit_trade)
trade_holding_times = [x.days for x in trades["duration"]]
sns.histplot(trade_holding_times, ax=ax_holding_time)
ax_holding_time.set(xlabel="Holding time in days")
sns.histplot(trades.pnl, ax=ax_pnl_per_round_trip_dollars)
ax_pnl_per_round_trip_dollars.set(xlabel="PnL per round-trip trade in $")
sns.histplot(trades.returns.dropna() * 100, ax=ax_pnl_per_round_trip_pct)
ax_pnl_per_round_trip_pct.set(xlabel="Round-trip returns in %")
gs.tight_layout(fig)
save_plot(fig,'Round Trip Tear Sheet')
if return_fig:
return fig
@plotting.customize
def create_interesting_times_tear_sheet(
returns,
benchmark_rets=None,
periods=None,
legend_loc="best",
return_fig=False,
):
"""
Generate a number of returns plots around interesting points in time,
like the flash crash and 9/11.
Plots: returns around the dotcom bubble burst, Lehmann Brothers' failure,
9/11, US downgrade and EU debt crisis, Fukushima meltdown, US housing
bubble burst, EZB IR, Great Recession (August 2007, March and September
of 2008, Q1 & Q2 2009), flash crash, April and October 2014.
benchmark_rets must be passed, as it is meaningless to analyze performance
during interesting times without some benchmark to refer to.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
benchmark_rets : pd.Series
Daily noncumulative returns of the benchmark.
- This is in the same style as returns.
periods: dict or OrderedDict, optional
historical event dates that may have had significant
impact on markets
legend_loc : plt.legend_loc, optional
The legend's location.
return_fig : boolean, optional
If True, returns the figure that was plotted on.
"""
logging.info('Running create_interesting_times_tear_sheet')
rets_interesting = timeseries.extract_interesting_date_ranges(returns, periods)
if not rets_interesting:
warnings.warn(
"Passed returns do not overlap with any" "interesting times.",
UserWarning,
)
return
utils.print_table(
pd.DataFrame(rets_interesting)
.describe()
.transpose()
.loc[:, ["mean", "min", "max"]]
* 100,
name="Stress Events",
float_format="{0:.2f}%".format,
)
if benchmark_rets is not None:
returns = utils.clip_returns_to_benchmark(returns, benchmark_rets)
bmark_interesting = timeseries.extract_interesting_date_ranges(
benchmark_rets, periods
)
num_plots = len(rets_interesting)
# 2 plots, 1 row; 3 plots, 2 rows; 4 plots, 2 rows; etc.
num_rows = int((num_plots + 1) / 2.0)
fig = plt.figure(figsize=(14, num_rows * 6.0))
gs = gridspec.GridSpec(num_rows, 2, wspace=0.5, hspace=0.5)
for i, (name, rets_period) in enumerate(rets_interesting.items()):
# i=0 -> 0, i=1 -> 0, i=2 -> 1 ;; i=0 -> 0, i=1 -> 1, i=2 -> 0
ax = plt.subplot(gs[int(i / 2.0), i % 2])
ep.cum_returns(rets_period).plot(
ax=ax, color="forestgreen", label="algo", alpha=0.7, lw=2
)
if benchmark_rets is not None:
ep.cum_returns(bmark_interesting[name]).plot(
ax=ax, color="gray", label="benchmark", alpha=0.6
)
ax.legend(
["Algo", "benchmark"],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
else:
ax.legend(["Algo"], loc=legend_loc, frameon=True, framealpha=0.5)
ax.set_title(name)
ax.set_ylabel("Returns")
ax.set_xlabel("")
save_plot(fig,'Interesting Times Tear Sheet')
if return_fig:
return fig
@plotting.customize
def create_capacity_tear_sheet(
returns,
positions,
transactions,
market_data,
liquidation_daily_vol_limit=0.2,
trade_daily_vol_limit=0.05,
last_n_days=utils.APPROX_BDAYS_PER_MONTH * 6,
days_to_liquidate_limit=1,
estimate_intraday="infer",
return_fig=False,
):
"""
Generates a report detailing portfolio size constraints set by
least liquid tickers. Plots a "capacity sweep," a curve describing
projected sharpe ratio given the slippage penalties that are
applied at various capital bases.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
market_data : pd.DataFrame
Daily market_data
- DataFrame has a multi-index index, one level is dates and another is
market_data contains volume & price, equities as columns
liquidation_daily_vol_limit : float
Max proportion of a daily bar that can be consumed in the
process of liquidating a position in the
"days to liquidation" analysis.
trade_daily_vol_limit : float
Flag daily transaction totals that exceed proportion of
daily bar.
last_n_days : integer
Compute max position allocation and dollar volume for only
the last N days of the backtest
days_to_liquidate_limit : integer
Display all tickers with greater max days to liquidation.
estimate_intraday: boolean or str, optional
Approximate returns for intraday strategies.
See description in create_full_tear_sheet.
return_fig : boolean, optional
If True, returns the figure that was plotted on.
"""
positions = utils.check_intraday(
estimate_intraday, returns, positions, transactions
)
print(
"Max days to liquidation is computed for each traded name "
"assuming a 20% limit on daily bar consumption \n"
"and trailing 5 day mean volume as the available bar volume.\n\n"
"Tickers with >1 day liquidation time at a"
" constant $1m capital base:"
)
max_days_by_ticker = capacity.get_max_days_to_liquidate_by_ticker(
positions,
market_data,
max_bar_consumption=liquidation_daily_vol_limit,
capital_base=1e6,
mean_volume_window=5,
)
max_days_by_ticker.index = max_days_by_ticker.index.map(utils.format_asset)
print("Whole backtest:")
utils.print_table(
max_days_by_ticker[
max_days_by_ticker.days_to_liquidate > days_to_liquidate_limit
]
)
max_days_by_ticker_lnd = capacity.get_max_days_to_liquidate_by_ticker(
positions,
market_data,
max_bar_consumption=liquidation_daily_vol_limit,
capital_base=1e6,
mean_volume_window=5,
last_n_days=last_n_days,
)
max_days_by_ticker_lnd.index = max_days_by_ticker_lnd.index.map(utils.format_asset)
print("Last {} trading days:".format(last_n_days))
utils.print_table(
max_days_by_ticker_lnd[max_days_by_ticker_lnd.days_to_liquidate > 1]
)
llt = capacity.get_low_liquidity_transactions(transactions, market_data)
llt.index = llt.index.map(utils.format_asset)
print(
"Tickers with daily transactions consuming >{}% of daily bar \n"
"all backtest:".format(trade_daily_vol_limit * 100)
)
utils.print_table(llt[llt["max_pct_bar_consumed"] > trade_daily_vol_limit * 100])
llt = capacity.get_low_liquidity_transactions(
transactions, market_data, last_n_days=last_n_days
)
print("Last {} trading days:".format(last_n_days))
utils.print_table(llt[llt["max_pct_bar_consumed"] > trade_daily_vol_limit * 100])
bt_starting_capital = positions.iloc[0].sum() / (1 + returns.iloc[0])
fig, ax_capacity_sweep = plt.subplots(figsize=(14, 6))
plotting.plot_capacity_sweep(
returns,
transactions,
market_data,
bt_starting_capital,
min_pv=100000,
max_pv=300000000,
step_size=1000000,
ax=ax_capacity_sweep,
)
save_plot(fig,'Capacity Tear Sheet')
if return_fig:
return fig
@plotting.customize
def create_perf_attrib_tear_sheet(
returns,
positions,
factor_returns,
factor_loadings,
transactions=None,
pos_in_dollars=True,
factor_partitions=FACTOR_PARTITIONS,
return_fig=False,
):
"""
Generate plots and tables for analyzing a strategy's performance.
Parameters
----------
returns : pd.Series
Returns for each day in the date range.
positions: pd.DataFrame
Daily holdings (in dollars or percentages), indexed by date.
Will be converted to percentages if positions are in dollars.
Short positions show up as cash in the 'cash' column.
factor_returns : pd.DataFrame
Returns by factor, with date as index and factors as columns
factor_loadings : pd.DataFrame
Factor loadings for all days in the date range, with date
and ticker as index, and factors as columns.
transactions : pd.DataFrame, optional
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
- Default is None.
pos_in_dollars : boolean, optional
Flag indicating whether `positions` are in dollars or percentages
If True, positions are in dollars.
factor_partitions : dict
dict specifying how factors should be separated in factor returns
and risk exposures plots
- Example:
{'style': ['momentum', 'size', 'value', ...],
'sector': ['technology', 'materials', ... ]}
return_fig : boolean, optional
If True, returns the figure that was plotted on.
"""
portfolio_exposures, perf_attrib_data = perf_attrib.perf_attrib(
returns,
positions,
factor_returns,
factor_loadings,
transactions,
pos_in_dollars=pos_in_dollars,
)
display(Markdown("## Performance Relative to Common Risk Factors"))
# aggregate perf attrib stats and show summary table
perf_attrib.show_perf_attrib_stats(
returns,
positions,
factor_returns,
factor_loadings,
transactions,
pos_in_dollars,
)
# one section for the returns plot, and for each factor grouping
# one section for factor returns, and one for risk exposures
if factor_partitions is not None:
vertical_sections = 1 + 2 * max(len(factor_partitions), 1)
else:
vertical_sections = 1 + 2
current_section = 0
fig = plt.figure(figsize=[14, vertical_sections * 6])
gs = gridspec.GridSpec(vertical_sections, 1, wspace=0.5, hspace=0.5)
perf_attrib.plot_returns(perf_attrib_data, ax=plt.subplot(gs[current_section]))
current_section += 1
if factor_partitions is not None:
for factor_type, partitions in factor_partitions.items():
columns_to_select = perf_attrib_data.columns.intersection(partitions)
perf_attrib.plot_factor_contribution_to_perf(
perf_attrib_data[columns_to_select],
ax=plt.subplot(gs[current_section]),
title=("Cumulative common {} returns attribution").format(factor_type),
)
current_section += 1
for factor_type, partitions in factor_partitions.items():
columns_to_select = portfolio_exposures.columns.intersection(partitions)
perf_attrib.plot_risk_exposures(
portfolio_exposures[columns_to_select],
ax=plt.subplot(gs[current_section]),
title="Daily {} factor exposures".format(factor_type),
)
current_section += 1
else:
perf_attrib.plot_factor_contribution_to_perf(
perf_attrib_data, ax=plt.subplot(gs[current_section])
)
current_section += 1
perf_attrib.plot_risk_exposures(
portfolio_exposures, ax=plt.subplot(gs[current_section])
)
# gs.tight_layout(fig)
save_plot(fig,'Perf Attribution Tear Sheet')
if return_fig:
return fig
timeseries.py
#
# Copyright 2018 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from functools import partial
import empyrical as ep
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as stats
from sklearn import linear_model
from .deprecate import deprecated
from .interesting_periods import PERIODS
from .txn import get_turnover
from .utils import APPROX_BDAYS_PER_MONTH, APPROX_BDAYS_PER_YEAR
from .utils import DAILY
DEPRECATION_WARNING = (
"Risk functions in pyfolio.timeseries are deprecated "
"and will be removed in a future release. Please "
"install the empyrical package instead."
)
def var_cov_var_normal(P, c, mu=0, sigma=1):
"""
Variance-covariance calculation of daily Value-at-Risk in a
portfolio.
Parameters
----------
P : float
Portfolio value.
c : float
Confidence level.
mu : float, optional
Mean.
Returns
-------
float
Variance-covariance.
"""
alpha = sp.stats.norm.ppf(1 - c, mu, sigma)
return P - P * (alpha + 1)
@deprecated(msg=DEPRECATION_WARNING)
def max_drawdown(returns):
"""
Determines the maximum drawdown of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
Returns
-------
float
Maximum drawdown.
Note
-----
See https://en.wikipedia.org/wiki/Drawdown_(economics) for more details.
"""
return ep.max_drawdown(returns)
@deprecated(msg=DEPRECATION_WARNING)
def annual_return(returns, period=DAILY):
"""
Determines the mean annual growth rate of returns.
Parameters
----------
returns : pd.Series
Periodic returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Can be 'monthly', 'weekly', or 'daily'.
- Defaults to 'daily'.
Returns
-------
float
Annual Return as CAGR (Compounded Annual Growth Rate).
"""
return ep.annual_return(returns, period=period)
@deprecated(msg=DEPRECATION_WARNING)
def annual_volatility(returns, period=DAILY):
"""
Determines the annual volatility of a strategy.
Parameters
----------
returns : pd.Series
Periodic returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing volatility. Can be 'monthly' or 'weekly' or 'daily'.
- Defaults to 'daily'.
Returns
-------
float
Annual volatility.
"""
return ep.annual_volatility(returns, period=period)
@deprecated(msg=DEPRECATION_WARNING)
def calmar_ratio(returns, period=DAILY):
"""
Determines the Calmar ratio, or drawdown ratio, of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Can be 'monthly', 'weekly', or 'daily'.
- Defaults to 'daily'.
Returns
-------
float
Calmar ratio (drawdown ratio) as float. Returns np.nan if there is no
calmar ratio.
Note
-----
See https://en.wikipedia.org/wiki/Calmar_ratio for more details.
"""
return ep.calmar_ratio(returns, period=period)
@deprecated(msg=DEPRECATION_WARNING)
def omega_ratio(returns, annual_return_threshhold=0.0):
"""
Determines the Omega ratio of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
annual_return_threshold : float, optional
Minimum acceptable return of the investor. Annual threshold over which
returns are considered positive or negative. It is converted to a
value appropriate for the period of the returns for this ratio.
E.g. An annual minimum acceptable return of 100 translates to a daily
minimum acceptable return of 0.01848.
(1 + 100) ** (1. / 252) - 1 = 0.01848
Daily returns must exceed this value to be considered positive. The
daily return yields the desired annual return when compounded over
the average number of business days in a year.
(1 + 0.01848) ** 252 - 1 = 99.93
- Defaults to 0.0
Returns
-------
float
Omega ratio.
Note
-----
See https://en.wikipedia.org/wiki/Omega_ratio for more details.
"""
return ep.omega_ratio(returns, required_return=annual_return_threshhold)
@deprecated(msg=DEPRECATION_WARNING)
def sortino_ratio(returns, required_return=0, period=DAILY):
"""
Determines the Sortino ratio of a strategy.
Parameters
----------
returns : pd.Series or pd.DataFrame
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
required_return: float / series
minimum acceptable return
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Can be 'monthly', 'weekly', or 'daily'.
- Defaults to 'daily'.
Returns
-------
depends on input type
series ==> float
DataFrame ==> np.array
Annualized Sortino ratio.
"""
return ep.sortino_ratio(returns, required_return=required_return)
@deprecated(msg=DEPRECATION_WARNING)
def downside_risk(returns, required_return=0, period=DAILY):
"""
Determines the downside deviation below a threshold
Parameters
----------
returns : pd.Series or pd.DataFrame
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
required_return: float / series
minimum acceptable return
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Can be 'monthly', 'weekly', or 'daily'.
- Defaults to 'daily'.
Returns
-------
depends on input type
series ==> float
DataFrame ==> np.array
Annualized downside deviation
"""
return ep.downside_risk(returns, required_return=required_return, period=period)
@deprecated(msg=DEPRECATION_WARNING)
def sharpe_ratio(returns, risk_free=0, period=DAILY):
"""
Determines the Sharpe ratio of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
risk_free : int, float
Constant risk-free return throughout the period.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Can be 'monthly', 'weekly', or 'daily'.
- Defaults to 'daily'.
Returns
-------
float
Sharpe ratio.
np.nan
If insufficient length of returns or if if adjusted returns are 0.
Note
-----
See https://en.wikipedia.org/wiki/Sharpe_ratio for more details.
"""
return ep.sharpe_ratio(returns, risk_free=risk_free, period=period)
@deprecated(msg=DEPRECATION_WARNING)
def alpha_beta(returns, factor_returns):
"""
Calculates both alpha and beta.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
factor_returns : pd.Series
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
Returns
-------
float
Alpha.
float
Beta.
"""
return ep.alpha_beta(returns, factor_returns=factor_returns)
@deprecated(msg=DEPRECATION_WARNING)
def alpha(returns, factor_returns):
"""
Calculates annualized alpha.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
factor_returns : pd.Series
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
Returns
-------
float
Alpha.
"""
return ep.alpha(returns, factor_returns=factor_returns)
@deprecated(msg=DEPRECATION_WARNING)
def beta(returns, factor_returns):
"""
Calculates beta.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
factor_returns : pd.Series
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
Returns
-------
float
Beta.
"""
return ep.beta(returns, factor_returns)
@deprecated(msg=DEPRECATION_WARNING)
def stability_of_timeseries(returns):
"""
Determines R-squared of a linear fit to the cumulative
log returns. Computes an ordinary least squares linear fit,
and returns R-squared.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
Returns
-------
float
R-squared.
"""
return ep.stability_of_timeseries(returns)
@deprecated(msg=DEPRECATION_WARNING)
def tail_ratio(returns):
"""
Determines the ratio between the right (95%) and left tail (5%).
For example, a ratio of 0.25 means that losses are four times
as bad as profits.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
Returns
-------
float
tail ratio
"""
return ep.tail_ratio(returns)
def common_sense_ratio(returns):
"""
Common sense ratio is the multiplication of the tail ratio and the
Gain-to-Pain-Ratio -- sum(profits) / sum(losses).
See http://bit.ly/1ORzGBk for more information on motivation of
this metric.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
Returns
-------
float
common sense ratio
"""
return ep.tail_ratio(returns) * (1 + ep.annual_return(returns))
def normalize(returns, starting_value=1):
"""
Normalizes a returns timeseries based on the first value.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
starting_value : float, optional
The starting returns (default 1).
Returns
-------
pd.Series
Normalized returns.
"""
return starting_value * (returns / returns.iloc[0])
@deprecated(msg=DEPRECATION_WARNING)
def cum_returns(returns, starting_value=0):
"""
Compute cumulative returns from simple returns.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
starting_value : float, optional
The starting returns (default 1).
Returns
-------
pandas.Series
Series of cumulative returns.
Notes
-----
For increased numerical accuracy, convert input to log returns
where it is possible to sum instead of multiplying.
"""
return ep.cum_returns(returns, starting_value=starting_value)
@deprecated(msg=DEPRECATION_WARNING)
def aggregate_returns(returns, convert_to):
"""
Aggregates returns by week, month, or year.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
convert_to : str
Can be 'weekly', 'monthly', or 'yearly'.
Returns
-------
pd.Series
Aggregated returns.
"""
return ep.aggregate_returns(returns, convert_to=convert_to)
def rolling_beta(returns, factor_returns, rolling_window=APPROX_BDAYS_PER_MONTH * 6):
"""
Determines the rolling beta of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series or pd.DataFrame
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- If DataFrame is passed, computes rolling beta for each column.
- This is in the same style as returns.
rolling_window : int, optional
The size of the rolling window, in days, over which to compute
beta (default 6 months).
Returns
-------
pd.Series
Rolling beta.
Note
-----
See https://en.wikipedia.org/wiki/Beta_(finance) for more details.
"""
if factor_returns.ndim > 1:
# Apply column-wise
return factor_returns.apply(
partial(rolling_beta, returns), rolling_window=rolling_window
)
else:
out = pd.Series(index=returns.index, dtype="float64")
for beg, end in zip(
returns.index[0:-rolling_window], returns.index[rolling_window:]
):
out.loc[end] = ep.beta(returns.loc[beg:end], factor_returns.loc[beg:end])
return out
def rolling_regression(
returns,
factor_returns,
rolling_window=APPROX_BDAYS_PER_MONTH * 6,
nan_threshold=0.1,
):
"""
Computes rolling factor betas using a multivariate linear regression
(separate linear regressions is problematic because the factors may be
confounded).
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.DataFrame
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- Computes rolling beta for each column.
- This is in the same style as returns.
rolling_window : int, optional
The days window over which to compute the beta. Defaults to 6 months.
nan_threshold : float, optional
If there are more than this fraction of NaNs, the rolling regression
for the given date will be skipped.
Returns
-------
pandas.DataFrame
DataFrame containing rolling beta coefficients to SMB, HML and UMD
"""
# We need to drop NaNs to regress
ret_no_na = returns.dropna()
columns = ["alpha"] + factor_returns.columns.tolist()
rolling_risk = pd.DataFrame(columns=columns, index=ret_no_na.index)
rolling_risk.index.name = "dt"
for beg, end in zip(
ret_no_na.index[:-rolling_window], ret_no_na.index[rolling_window:]
):
returns_period = ret_no_na[beg:end]
factor_returns_period = factor_returns.loc[returns_period.index]
if np.all(factor_returns_period.isnull().mean()) < nan_threshold:
factor_returns_period_dnan = factor_returns_period.dropna()
reg = linear_model.LinearRegression(fit_intercept=True).fit(
factor_returns_period_dnan,
returns_period.loc[factor_returns_period_dnan.index],
)
rolling_risk.loc[end, factor_returns.columns] = reg.coef_
rolling_risk.loc[end, "alpha"] = reg.intercept_
return rolling_risk
def gross_lev(positions):
"""
Calculates the gross leverage of a strategy.
Parameters
----------
positions : pd.DataFrame
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
Returns
-------
pd.Series
Gross leverage.
"""
exposure = positions.drop("cash", axis=1).abs().sum(axis=1)
return exposure / positions.sum(axis=1)
def value_at_risk(returns, period=None, sigma=2.0):
"""
Get value at risk (VaR).
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
period : str, optional
Period over which to calculate VaR. Set to 'weekly',
'monthly', or 'yearly', otherwise defaults to period of
returns (typically daily).
sigma : float, optional
Standard deviations of VaR, default 2.
"""
if period is not None:
returns_agg = ep.aggregate_returns(returns, period)
else:
returns_agg = returns.copy()
value_at_risk = returns_agg.mean() - sigma * returns_agg.std()
return value_at_risk
SIMPLE_STAT_FUNCS = [
ep.annual_return,
ep.cum_returns_final,
ep.annual_volatility,
ep.sharpe_ratio,
ep.calmar_ratio,
ep.stability_of_timeseries,
ep.max_drawdown,
ep.omega_ratio,
ep.sortino_ratio,
stats.skew,
stats.kurtosis,
ep.tail_ratio,
value_at_risk,
]
FACTOR_STAT_FUNCS = [
ep.alpha,
ep.beta,
]
STAT_FUNC_NAMES = {
"annual_return": "Annual return",
"cum_returns_final": "Cumulative returns",
"annual_volatility": "Annual volatility",
"sharpe_ratio": "Sharpe ratio",
"calmar_ratio": "Calmar ratio",
"stability_of_timeseries": "Stability",
"max_drawdown": "Max drawdown",
"omega_ratio": "Omega ratio",
"sortino_ratio": "Sortino ratio",
"skew": "Skew",
"kurtosis": "Kurtosis",
"tail_ratio": "Tail ratio",
"common_sense_ratio": "Common sense ratio",
"value_at_risk": "Daily value at risk",
"alpha": "Alpha",
"beta": "Beta",
}
def perf_stats(
returns,
factor_returns=None,
positions=None,
transactions=None,
turnover_denom="AGB",
):
"""
Calculates various performance metrics of a strategy, for use in
plotting.show_perf_stats.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series, optional
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
- If None, do not compute alpha, beta, and information ratio.
positions : pd.DataFrame
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
turnover_denom : str
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
Returns
-------
pd.Series
Performance metrics.
"""
stats = pd.Series(dtype="float64")
for stat_func in SIMPLE_STAT_FUNCS:
stats[STAT_FUNC_NAMES[stat_func.__name__]] = stat_func(returns)
if not (positions is None or positions.empty):
stats["Gross leverage"] = gross_lev(positions).mean()
if not (transactions is None or transactions.empty):
stats["Daily turnover"] = get_turnover(
positions, transactions, turnover_denom
).mean()
if factor_returns is not None:
for stat_func in FACTOR_STAT_FUNCS:
res = stat_func(returns, factor_returns)
stats[STAT_FUNC_NAMES[stat_func.__name__]] = res
return stats
def perf_stats_bootstrap(returns, factor_returns=None, return_stats=True, **kwargs):
"""Calculates various bootstrapped performance metrics of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series, optional
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
- If None, do not compute alpha, beta, and information ratio.
return_stats : boolean (optional)
If True, returns a DataFrame of mean, median, 5 and 95 percentiles
for each perf metric.
If False, returns a DataFrame with the bootstrap samples for
each perf metric.
Returns
-------
pd.DataFrame
if return_stats is True:
- Distributional statistics of bootstrapped sampling
distribution of performance metrics.
if return_stats is False:
- Bootstrap samples for each performance metric.
"""
bootstrap_values = OrderedDict()
for stat_func in SIMPLE_STAT_FUNCS:
stat_name = STAT_FUNC_NAMES[stat_func.__name__]
bootstrap_values[stat_name] = calc_bootstrap(stat_func, returns)
if factor_returns is not None:
for stat_func in FACTOR_STAT_FUNCS:
stat_name = STAT_FUNC_NAMES[stat_func.__name__]
bootstrap_values[stat_name] = calc_bootstrap(
stat_func, returns, factor_returns=factor_returns
)
bootstrap_values = pd.DataFrame(bootstrap_values)
if return_stats:
stats = bootstrap_values.apply(calc_distribution_stats)
return stats.T[["mean", "median", "5%", "95%"]]
else:
return bootstrap_values
def calc_bootstrap(func, returns, *args, **kwargs):
"""Performs a bootstrap analysis on a user-defined function returning
a summary statistic.
Parameters
----------
func : function
Function that either takes a single array (commonly returns)
or two arrays (commonly returns and factor returns) and
returns a single value (commonly a summary
statistic). Additional args and kwargs are passed as well.
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series, optional
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
n_samples : int, optional
Number of bootstrap samples to draw. Default is 1000.
Increasing this will lead to more stable / accurate estimates.
Returns
-------
numpy.ndarray
Bootstrapped sampling distribution of passed in func.
"""
n_samples = kwargs.pop("n_samples", 1000)
out = np.empty(n_samples)
factor_returns = kwargs.pop("factor_returns", None)
for i in range(n_samples):
idx = np.random.randint(len(returns), size=len(returns))
returns_i = returns.iloc[idx].reset_index(drop=True)
if factor_returns is not None:
factor_returns_i = factor_returns.iloc[idx].reset_index(drop=True)
out[i] = func(returns_i, factor_returns_i, *args, **kwargs)
else:
out[i] = func(returns_i, *args, **kwargs)
return out
def calc_distribution_stats(x):
"""Calculate various summary statistics of data.
Parameters
----------
x : numpy.ndarray or pandas.Series
Array to compute summary statistics for.
Returns
-------
pandas.Series
Series containing mean, median, std, as well as 5, 25, 75 and
95 percentiles of passed in values.
"""
return pd.Series(
{
"mean": np.mean(x),
"median": np.median(x),
"std": np.std(x),
"5%": np.percentile(x, 5),
"25%": np.percentile(x, 25),
"75%": np.percentile(x, 75),
"95%": np.percentile(x, 95),
"IQR": np.subtract.reduce(np.percentile(x, [75, 25])),
}
)
def get_max_drawdown_underwater(underwater):
"""
Determines peak, valley, and recovery dates given an 'underwater'
DataFrame.
An underwater DataFrame is a DataFrame that has precomputed
rolling drawdown.
Parameters
----------
underwater : pd.Series
Underwater returns (rolling drawdown) of a strategy.
Returns
-------
peak : datetime
The maximum drawdown's peak.
valley : datetime
The maximum drawdown's valley.
recovery : datetime
The maximum drawdown's recovery.
"""
valley = underwater.idxmin() # end of the period
# Find first 0
peak = underwater[:valley][underwater[:valley] == 0].index[-1]
# Find last 0
try:
recovery = underwater[valley:][underwater[valley:] == 0].index[0]
except IndexError:
recovery = np.nan # drawdown not recovered
return peak, valley, recovery
def get_max_drawdown(returns):
"""
Determines the maximum drawdown of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~pyfolio.timeseries.cum_returns`.
Returns
-------
float
Maximum drawdown.
Note
-----
See https://en.wikipedia.org/wiki/Drawdown_(economics) for more details.
"""
returns = returns.copy()
df_cum = ep.cum_returns(returns, 1.0)
running_max = np.maximum.accumulate(df_cum)
underwater = df_cum / running_max - 1
return get_max_drawdown_underwater(underwater)
def get_top_drawdowns(returns, top=10):
"""
Finds top drawdowns, sorted by drawdown amount.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
top : int, optional
The amount of top drawdowns to find (default 10).
Returns
-------
drawdowns : list
List of drawdown peaks, valleys, and recoveries. See get_max_drawdown.
"""
returns = returns.copy()
df_cum = ep.cum_returns(returns, 1.0)
running_max = np.maximum.accumulate(df_cum)
underwater = df_cum / running_max - 1
drawdowns = []
for _ in range(top):
peak, valley, recovery = get_max_drawdown_underwater(underwater)
# Slice out draw-down period
if not pd.isnull(recovery):
underwater.drop(underwater[peak:recovery].index[1:-1], inplace=True)
else:
# drawdown has not ended yet
underwater = underwater.loc[:peak]
drawdowns.append((peak, valley, recovery))
if (len(returns) == 0) or (len(underwater) == 0) or (np.min(underwater) == 0):
break
return drawdowns
def gen_drawdown_table(returns, top=10):
"""
Places top drawdowns in a table.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
top : int, optional
The amount of top drawdowns to find (default 10).
Returns
-------
df_drawdowns : pd.DataFrame
Information about top drawdowns.
"""
df_cum = ep.cum_returns(returns, 1.0)
drawdown_periods = get_top_drawdowns(returns, top=top)
df_drawdowns = pd.DataFrame(
index=list(range(top)),
columns=[
"Net drawdown in %",
"Peak date",
"Valley date",
"Recovery date",
"Duration",
],
)
for i, (peak, valley, recovery) in enumerate(drawdown_periods):
if pd.isnull(recovery):
df_drawdowns.loc[i, "Duration"] = np.nan
else:
df_drawdowns.loc[i, "Duration"] = len(
pd.date_range(peak, recovery, freq="B")
)
df_drawdowns.loc[i, "Peak date"] = peak.to_pydatetime().strftime("%Y-%m-%d")
df_drawdowns.loc[i, "Valley date"] = valley.to_pydatetime().strftime("%Y-%m-%d")
if isinstance(recovery, float):
df_drawdowns.loc[i, "Recovery date"] = recovery
else:
df_drawdowns.loc[i, "Recovery date"] = recovery.to_pydatetime().strftime(
"%Y-%m-%d"
)
df_drawdowns.loc[i, "Net drawdown in %"] = (
(df_cum.loc[peak] - df_cum.loc[valley]) / df_cum.loc[peak]
) * 100
df_drawdowns["Peak date"] = pd.to_datetime(df_drawdowns["Peak date"])
df_drawdowns["Valley date"] = pd.to_datetime(df_drawdowns["Valley date"])
df_drawdowns["Recovery date"] = pd.to_datetime(df_drawdowns["Recovery date"])
return df_drawdowns
def rolling_volatility(returns, rolling_vol_window):
"""
Determines the rolling volatility of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
rolling_vol_window : int
Length of rolling window, in days, over which to compute.
Returns
-------
pd.Series
Rolling volatility.
"""
return returns.rolling(rolling_vol_window).std() * np.sqrt(APPROX_BDAYS_PER_YEAR)
def rolling_sharpe(returns, rolling_sharpe_window):
"""
Determines the rolling Sharpe ratio of a strategy.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
rolling_sharpe_window : int
Length of rolling window, in days, over which to compute.
Returns
-------
pd.Series
Rolling Sharpe ratio.
Note
-----
See https://en.wikipedia.org/wiki/Sharpe_ratio for more details.
"""
return (
returns.rolling(rolling_sharpe_window).mean()
/ returns.rolling(rolling_sharpe_window).std()
* np.sqrt(APPROX_BDAYS_PER_YEAR)
)
def simulate_paths(
is_returns, num_days, starting_value=1, num_samples=1000, random_seed=None
):
"""
Gnerate alternate paths using available values from in-sample returns.
Parameters
----------
is_returns : pandas.core.frame.DataFrame
Non-cumulative in-sample returns.
num_days : int
Number of days to project the probability cone forward.
starting_value : int or float
Starting value of the out of sample period.
num_samples : int
Number of samples to draw from the in-sample daily returns.
Each sample will be an array with length num_days.
A higher number of samples will generate a more accurate
bootstrap cone.
random_seed : int
Seed for the pseudorandom number generator used by the pandas
sample method.
Returns
-------
samples : numpy.ndarray
"""
samples = np.empty((num_samples, num_days))
seed = np.random.RandomState(seed=random_seed)
for i in range(num_samples):
samples[i, :] = is_returns.sample(num_days, replace=True, random_state=seed)
return samples
def summarize_paths(samples, cone_std=(1.0, 1.5, 2.0), starting_value=1.0):
"""
Gnerate the upper and lower bounds of an n standard deviation
cone of forecasted cumulative returns.
Parameters
----------
samples : numpy.ndarray
Alternative paths, or series of possible outcomes.
cone_std : list of int/float
Number of standard devations to use in the boundaries of
the cone. If multiple values are passed, cone bounds will
be generated for each value.
Returns
-------
samples : pandas.core.frame.DataFrame
"""
cum_samples = ep.cum_returns(samples.T, starting_value=starting_value).T
cum_mean = cum_samples.mean(axis=0)
cum_std = cum_samples.std(axis=0)
if isinstance(cone_std, (float, int)):
cone_std = [cone_std]
cone_bounds = pd.DataFrame(columns=pd.Index([], dtype="float64"))
for num_std in cone_std:
cone_bounds.loc[:, float(num_std)] = cum_mean + cum_std * num_std
cone_bounds.loc[:, float(-num_std)] = cum_mean - cum_std * num_std
return cone_bounds
def forecast_cone_bootstrap(
is_returns,
num_days,
cone_std=(1.0, 1.5, 2.0),
starting_value=1,
num_samples=1000,
random_seed=None,
):
"""
Determines the upper and lower bounds of an n standard deviation
cone of forecasted cumulative returns. Future cumulative mean and
standard devation are computed by repeatedly sampling from the
in-sample daily returns (i.e. bootstrap). This cone is non-parametric,
meaning it does not assume that returns are normally distributed.
Parameters
----------
is_returns : pd.Series
In-sample daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
num_days : int
Number of days to project the probability cone forward.
cone_std : int, float, or list of int/float
Number of standard devations to use in the boundaries of
the cone. If multiple values are passed, cone bounds will
be generated for each value.
starting_value : int or float
Starting value of the out of sample period.
num_samples : int
Number of samples to draw from the in-sample daily returns.
Each sample will be an array with length num_days.
A higher number of samples will generate a more accurate
bootstrap cone.
random_seed : int
Seed for the pseudorandom number generator used by the pandas
sample method.
Returns
-------
pd.DataFrame
Contains upper and lower cone boundaries. Column names are
strings corresponding to the number of standard devations
above (positive) or below (negative) the projected mean
cumulative returns.
"""
samples = simulate_paths(
is_returns=is_returns,
num_days=num_days,
starting_value=starting_value,
num_samples=num_samples,
random_seed=random_seed,
)
cone_bounds = summarize_paths(
samples=samples, cone_std=cone_std, starting_value=starting_value
)
return cone_bounds
def extract_interesting_date_ranges(returns, periods=None):
"""
Extracts returns based on interesting events. See
gen_date_range_interesting.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
Returns
-------
ranges : OrderedDict
Date ranges, with returns, of all valid events.
"""
if periods is None:
periods = PERIODS
returns_dupe = returns.copy()
returns_dupe.index = returns_dupe.index.map(pd.Timestamp)
ranges = OrderedDict()
for name, (start, end) in periods.items():
try:
period = returns_dupe.loc[start:end]
if len(period) == 0:
continue
ranges[name] = period
except BaseException:
continue
return ranges
plotting.py
#
# Copyright 2018 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import calendar
from collections import OrderedDict
from functools import wraps
import empyrical as ep
import matplotlib
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytz
import scipy as sp
from matplotlib import figure
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.ticker import FuncFormatter
import os
import seaborn as sns
from . import capacity
from . import pos
from . import timeseries
from . import txn
from . import utils
from .utils import APPROX_BDAYS_PER_MONTH, MM_DISPLAY_UNIT
def customize(func):
"""
Decorator to set plotting context and axes style during function call.
"""
@wraps(func)
def call_w_context(*args, **kwargs):
set_context = kwargs.pop("set_context", True)
if set_context:
with plotting_context(), axes_style():
return func(*args, **kwargs)
else:
return func(*args, **kwargs)
return call_w_context
def plotting_context(context="notebook", font_scale=1.5, rc=None):
"""
Create pyfolio default plotting style context.
Under the hood, calls and returns seaborn.plotting_context() with
some custom settings. Usually you would use in a with-context.
Parameters
----------
context : str, optional
Name of seaborn context.
font_scale : float, optional
Scale font by factor font_scale.
rc : dict, optional
Config flags.
By default, {'lines.linewidth': 1.5}
is being used and will be added to any
rc passed in, unless explicitly overriden.
Returns
-------
seaborn plotting context
Example
-------
>>> with pyfolio.plotting.plotting_context(font_scale=2):
>>> pyfolio.create_full_tear_sheet(..., set_context=False)
See also
--------
For more information, see seaborn.plotting_context().
"""
if rc is None:
rc = {}
rc_default = {"lines.linewidth": 1.5}
# Add defaults if they do not exist
for name, val in rc_default.items():
rc.setdefault(name, val)
return sns.plotting_context(context=context, font_scale=font_scale, rc=rc)
def axes_style(style="darkgrid", rc=None):
"""
Create pyfolio default axes style context.
Under the hood, calls and returns seaborn.axes_style() with
some custom settings. Usually you would use in a with-context.
Parameters
----------
style : str, optional
Name of seaborn style.
rc : dict, optional
Config flags.
Returns
-------
seaborn plotting context
Example
-------
>>> with pyfolio.plotting.axes_style(style='whitegrid'):
>>> pyfolio.create_full_tear_sheet(..., set_context=False)
See also
--------
For more information, see seaborn.plotting_context().
"""
if rc is None:
rc = {}
rc_default = {}
# Add defaults if they do not exist
for name, val in rc_default.items():
rc.setdefault(name, val)
return sns.axes_style(style=style, rc=rc)
def plot_monthly_returns_heatmap(returns, ax=None, **kwargs):
"""
Plots a heatmap of returns by month.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to seaborn plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
monthly_ret_table = ep.aggregate_returns(returns, "monthly")
monthly_ret_table = monthly_ret_table.unstack().round(3)
monthly_ret_table.rename(
columns={i: m for i, m in enumerate(calendar.month_abbr)}, inplace=True
)
sns.heatmap(
monthly_ret_table.fillna(0) * 100.0,
annot=True,
annot_kws={"size": 9},
alpha=1.0,
center=0.0,
cbar=False,
cmap=matplotlib.cm.RdYlGn,
ax=ax,
**kwargs,
)
ax.set_ylabel("Year")
ax.set_xlabel("Month")
ax.set_title("Monthly returns (%)")
return ax
def plot_annual_returns(returns, ax=None, **kwargs):
"""
Plots a bar graph of returns by year.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
x_axis_formatter = FuncFormatter(utils.percentage)
ax.xaxis.set_major_formatter(FuncFormatter(x_axis_formatter))
ax.tick_params(axis="x", which="major")
ann_ret_df = pd.DataFrame(ep.aggregate_returns(returns, "yearly"))
ax.axvline(
100 * ann_ret_df.values.mean(),
color="red",
linestyle="--",
lw=1,
alpha=0.7,
)
(100 * ann_ret_df.sort_index(ascending=False)).plot(
ax=ax, kind="barh", alpha=0.70, **kwargs
)
ax.axvline(0.0, color="black", linestyle="-", lw=2)
ax.set_ylabel("Year")
ax.set_xlabel("Returns")
ax.set_title("Annual returns")
ax.legend(["Mean"], frameon=True, framealpha=0.5)
return ax
def plot_monthly_returns_dist(returns, ax=None, **kwargs):
"""
Plots a distribution of monthly returns.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
x_axis_formatter = FuncFormatter(utils.percentage)
ax.xaxis.set_major_formatter(FuncFormatter(x_axis_formatter))
ax.tick_params(axis="x", which="major")
monthly_ret_table = ep.aggregate_returns(returns, "monthly")
ax.hist(
100 * monthly_ret_table,
color="steelblue",
alpha=0.80,
bins=20,
**kwargs,
)
ax.axvline(
100 * monthly_ret_table.mean(),
color="red",
linestyle="--",
lw=1,
alpha=1.0,
)
ax.axvline(0.0, color="black", linestyle="-", lw=1, alpha=0.75)
ax.legend(["Mean"], frameon=True, framealpha=0.5)
ax.set_ylabel("Number of months")
ax.set_xlabel("Returns")
ax.set_title("Distribution of monthly returns")
return ax
def plot_holdings(returns, positions, legend_loc="best", ax=None, **kwargs):
"""
Plots total amount of stocks with an active position, either short
or long. Displays daily total, daily average per month, and
all-time daily average.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
positions : pd.DataFrame, optional
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
positions = positions.copy().drop("cash", axis="columns")
df_holdings = positions.replace(0, np.nan).count(axis=1)
df_holdings_by_month = df_holdings.resample("1M").mean()
df_holdings.plot(color="steelblue", alpha=0.6, lw=0.5, ax=ax, **kwargs)
df_holdings_by_month.plot(color="orangered", lw=2, ax=ax, **kwargs)
ax.axhline(df_holdings.values.mean(), color="steelblue", ls="--", lw=3)
ax.set_xlim((returns.index[0], returns.index[-1]))
leg = ax.legend(
[
"Daily holdings",
"Average daily holdings, by month",
"Average daily holdings, overall",
],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
leg.get_frame().set_edgecolor("black")
ax.set_title("Total holdings")
ax.set_ylabel("Holdings")
ax.set_xlabel("")
return ax
def plot_long_short_holdings(
returns, positions, legend_loc="upper left", ax=None, **kwargs
):
"""
Plots total amount of stocks with an active position, breaking out
short and long into transparent filled regions.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
positions : pd.DataFrame, optional
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
positions = positions.drop("cash", axis="columns")
positions = positions.replace(0, np.nan)
df_longs = positions[positions > 0].count(axis=1)
df_shorts = positions[positions < 0].count(axis=1)
lf = ax.fill_between(
df_longs.index, 0, df_longs.values, color="g", alpha=0.5, lw=2.0
)
sf = ax.fill_between(
df_shorts.index, 0, df_shorts.values, color="r", alpha=0.5, lw=2.0
)
bf = patches.Rectangle([0, 0], 1, 1, color="darkgoldenrod")
leg = ax.legend(
[lf, sf, bf],
[
"Long (max: %s, min: %s)" % (df_longs.max(), df_longs.min()),
"Short (max: %s, min: %s)" % (df_shorts.max(), df_shorts.min()),
"Overlap",
],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
leg.get_frame().set_edgecolor("black")
ax.set_xlim((returns.index[0], returns.index[-1]))
ax.set_title("Long and short holdings")
ax.set_ylabel("Holdings")
ax.set_xlabel("")
return ax
def plot_drawdown_periods(returns, top=10, ax=None, **kwargs):
"""
Plots cumulative returns highlighting top drawdown periods.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
top : int, optional
Amount of top drawdowns periods to plot (default 10).
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
y_axis_formatter = FuncFormatter(utils.two_dec_places)
ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
df_cum_rets = ep.cum_returns(returns, starting_value=1.0)
df_drawdowns = timeseries.gen_drawdown_table(returns, top=top)
df_cum_rets.plot(ax=ax, **kwargs)
lim = ax.get_ylim()
colors = sns.cubehelix_palette(len(df_drawdowns))[::-1]
for i, (peak, recovery) in df_drawdowns[["Peak date", "Recovery date"]].iterrows():
if pd.isnull(recovery):
recovery = returns.index[-1]
ax.fill_between((peak, recovery), lim[0], lim[1], alpha=0.4, color=colors[i])
ax.set_ylim(lim)
ax.set_title("Top %i drawdown periods" % top)
ax.set_ylabel("Cumulative returns")
ax.legend(["Portfolio"], loc="upper left", frameon=True, framealpha=0.5)
ax.set_xlabel("")
return ax
def plot_drawdown_underwater(returns, ax=None, **kwargs):
"""
Plots how far underwaterr returns are over time, or plots current
drawdown vs. date.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
y_axis_formatter = FuncFormatter(utils.percentage)
ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
df_cum_rets = ep.cum_returns(returns, starting_value=1.0)
running_max = np.maximum.accumulate(df_cum_rets)
underwater = -100 * ((running_max - df_cum_rets) / running_max)
underwater.plot(ax=ax, kind="area", color="salmon", alpha=0.7, **kwargs)
ax.set_ylabel("Drawdown")
ax.set_title("Underwater plot")
ax.set_xlabel("")
return ax
def plot_perf_stats(returns, factor_returns, ax=None):
"""
Create box plot of some performance metrics of the strategy.
The width of the box whiskers is determined by a bootstrap.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
ax : matplotlib.Axes, optional
Axes upon which to plot.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
bootstrap_values = timeseries.perf_stats_bootstrap(
returns, factor_returns, return_stats=False
)
bootstrap_values = bootstrap_values.drop("Kurtosis", axis="columns")
sns.boxplot(data=bootstrap_values, orient="h", ax=ax)
return ax
STAT_FUNCS_PCT = [
"Annual return",
"Cumulative returns",
"Annual volatility",
"Max drawdown",
"Daily value at risk",
"Daily turnover",
]
def show_perf_stats(
returns,
factor_returns=None,
positions=None,
transactions=None,
turnover_denom="AGB",
live_start_date=None,
bootstrap=False,
header_rows=None,
):
"""
Prints some performance metrics of the strategy.
- Shows amount of time the strategy has been run in backtest and
out-of-sample (in live trading).
- Shows Omega ratio, max drawdown, Calmar ratio, annual return,
stability, Sharpe ratio, annual volatility, alpha, and beta.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series, optional
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
positions : pd.DataFrame, optional
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame, optional
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet
turnover_denom : str, optional
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
live_start_date : datetime, optional
The point in time when the strategy began live trading, after
its backtest period.
bootstrap : boolean, optional
Whether to perform bootstrap analysis for the performance
metrics.
- For more information, see timeseries.perf_stats_bootstrap
header_rows : dict or OrderedDict, optional
Extra rows to display at the top of the displayed table.
"""
if bootstrap:
perf_func = timeseries.perf_stats_bootstrap
else:
perf_func = timeseries.perf_stats
perf_stats_all = perf_func(
returns,
factor_returns=factor_returns,
positions=positions,
transactions=transactions,
turnover_denom=turnover_denom,
)
date_rows = OrderedDict()
if len(returns.index) > 0:
date_rows["Start date"] = returns.index[0].strftime("%Y-%m-%d")
date_rows["End date"] = returns.index[-1].strftime("%Y-%m-%d")
if live_start_date is not None:
live_start_date = ep.utils.get_utc_timestamp(live_start_date)
returns_is = returns[returns.index < live_start_date]
returns_oos = returns[returns.index >= live_start_date]
positions_is = None
positions_oos = None
transactions_is = None
transactions_oos = None
if positions is not None:
positions_is = positions[positions.index < live_start_date]
positions_oos = positions[positions.index >= live_start_date]
if transactions is not None:
transactions_is = transactions[(transactions.index < live_start_date)]
transactions_oos = transactions[(transactions.index > live_start_date)]
perf_stats_is = perf_func(
returns_is,
factor_returns=factor_returns,
positions=positions_is,
transactions=transactions_is,
turnover_denom=turnover_denom,
)
perf_stats_oos = perf_func(
returns_oos,
factor_returns=factor_returns,
positions=positions_oos,
transactions=transactions_oos,
turnover_denom=turnover_denom,
)
if len(returns.index) > 0:
date_rows["In-sample months"] = int(
len(returns_is) / APPROX_BDAYS_PER_MONTH
)
date_rows["Out-of-sample months"] = int(
len(returns_oos) / APPROX_BDAYS_PER_MONTH
)
perf_stats = pd.concat(
OrderedDict(
[
("In-sample", perf_stats_is),
("Out-of-sample", perf_stats_oos),
("All", perf_stats_all),
]
),
axis=1,
)
else:
if len(returns.index) > 0:
date_rows["Total months"] = int(len(returns) / APPROX_BDAYS_PER_MONTH)
perf_stats = pd.DataFrame(perf_stats_all, columns=["Backtest"])
for column in perf_stats.columns:
for stat, value in perf_stats[column].items():
if stat in STAT_FUNCS_PCT:
perf_stats.loc[stat, column] = str(np.round(value * 100, 3)) + "%"
if header_rows is None:
header_rows = date_rows
else:
header_rows = OrderedDict(header_rows)
header_rows.update(date_rows)
utils.print_table(
perf_stats,
float_format="{0:.2f}".format,
header_rows=header_rows,
)
def plot_returns(returns, live_start_date=None, ax=None):
"""
Plots raw returns over time.
Backtest returns are in green, and out-of-sample (live trading)
returns are in red.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
live_start_date : datetime, optional
The date when the strategy began live trading, after
its backtest period. This date should be normalized.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
ax.set_label("")
ax.set_ylabel("Returns")
if live_start_date is not None:
live_start_date = ep.utils.get_utc_timestamp(live_start_date)
is_returns = returns.loc[returns.index < live_start_date]
oos_returns = returns.loc[returns.index >= live_start_date]
is_returns.plot(ax=ax, color="g")
oos_returns.plot(ax=ax, color="r")
else:
returns.plot(ax=ax, color="g")
return ax
def plot_rolling_returns(
returns,
factor_returns=None,
live_start_date=None,
logy=False,
cone_std=None,
legend_loc="best",
volatility_match=False,
cone_function=timeseries.forecast_cone_bootstrap,
ax=None,
**kwargs,
):
"""
Plots cumulative rolling returns versus some benchmarks'.
Backtest returns are in green, and out-of-sample (live trading)
returns are in red.
Additionally, a non-parametric cone plot may be added to the
out-of-sample returns region.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series, optional
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
live_start_date : datetime, optional
The date when the strategy began live trading, after
its backtest period. This date should be normalized.
logy : bool, optional
Whether to log-scale the y-axis.
cone_std : float, or tuple, optional
If float, The standard deviation to use for the cone plots.
If tuple, Tuple of standard deviation values to use for the cone plots
- See timeseries.forecast_cone_bounds for more details.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
volatility_match : bool, optional
Whether to normalize the volatility of the returns to those of the
benchmark returns. This helps compare strategies with different
volatilities. Requires passing of benchmark_rets.
cone_function : function, optional
Function to use when generating forecast probability cone.
The function signiture must follow the form:
def cone(in_sample_returns (pd.Series),
days_to_project_forward (int),
cone_std= (float, or tuple),
starting_value= (int, or float))
See timeseries.forecast_cone_bootstrap for an example.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
ax.set_xlabel("")
ax.set_ylabel("Cumulative returns")
ax.set_yscale("log" if logy else "linear")
if volatility_match and factor_returns is None:
raise ValueError("volatility_match requires passing of factor_returns.")
elif volatility_match and factor_returns is not None:
bmark_vol = factor_returns.loc[returns.index].std()
returns = (returns / returns.std()) * bmark_vol
cum_rets = ep.cum_returns(returns, 1.0)
y_axis_formatter = FuncFormatter(utils.two_dec_places)
ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
if factor_returns is not None:
cum_factor_returns = ep.cum_returns(factor_returns[cum_rets.index], 1.0)
cum_factor_returns.plot(
lw=2,
color="gray",
label=factor_returns.name,
alpha=0.60,
ax=ax,
**kwargs,
)
if live_start_date is not None:
live_start_date = ep.utils.get_utc_timestamp(live_start_date)
is_cum_returns = cum_rets.loc[cum_rets.index < live_start_date]
oos_cum_returns = cum_rets.loc[cum_rets.index >= live_start_date]
else:
is_cum_returns = cum_rets
oos_cum_returns = pd.Series([], dtype="float64")
is_cum_returns.plot(
lw=2, color="forestgreen", alpha=0.6, label="Backtest", ax=ax, **kwargs
)
if len(oos_cum_returns) > 0:
oos_cum_returns.plot(
lw=2, color="red", alpha=0.6, label="Live", ax=ax, **kwargs
)
if cone_std is not None:
if isinstance(cone_std, (float, int)):
cone_std = [cone_std]
is_returns = returns.loc[returns.index < live_start_date]
cone_bounds = cone_function(
is_returns,
len(oos_cum_returns),
cone_std=cone_std,
starting_value=is_cum_returns[-1],
)
cone_bounds = cone_bounds.set_index(oos_cum_returns.index)
for std in cone_std:
ax.fill_between(
cone_bounds.index,
cone_bounds[float(std)],
cone_bounds[float(-std)],
color="steelblue",
alpha=0.5,
)
if legend_loc is not None:
ax.legend(loc=legend_loc, frameon=True, framealpha=0.5)
ax.axhline(1.0, linestyle="--", color="black", lw=1)
return ax
def plot_rolling_beta(returns, factor_returns, legend_loc="best", ax=None, **kwargs):
"""
Plots the rolling 6-month and 12-month beta versus date.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series
Daily noncumulative returns of the benchmark factor to which betas are
computed. Usually a benchmark such as market returns.
- This is in the same style as returns.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
y_axis_formatter = FuncFormatter(utils.two_dec_places)
ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
ax.set_title("Rolling portfolio beta to " + str(factor_returns.name))
ax.set_ylabel("Beta")
rb_1 = timeseries.rolling_beta(
returns, factor_returns, rolling_window=APPROX_BDAYS_PER_MONTH * 6
)
rb_1.plot(color="steelblue", lw=2, alpha=0.6, ax=ax, **kwargs)
rb_2 = timeseries.rolling_beta(
returns, factor_returns, rolling_window=APPROX_BDAYS_PER_MONTH * 12
)
rb_2.plot(color="grey", lw=2, alpha=0.4, ax=ax, **kwargs)
ax.axhline(rb_1.mean(), color="steelblue", linestyle="--", lw=2)
ax.axhline(1.0, color="black", linestyle="--", lw=1)
ax.set_xlabel("")
ax.legend(
["6-mo", "12-mo", "6-mo Average"],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
# ax.set_ylim((-0.5, 1.5))
return ax
def plot_rolling_volatility(
returns,
factor_returns=None,
rolling_window=APPROX_BDAYS_PER_MONTH * 6,
legend_loc="best",
ax=None,
**kwargs,
):
"""
Plots the rolling volatility versus date.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series, optional
Daily noncumulative returns of the benchmark factor for which the
benchmark rolling volatility is computed. Usually a benchmark such
as market returns.
- This is in the same style as returns.
rolling_window : int, optional
The days window over which to compute the volatility.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
y_axis_formatter = FuncFormatter(utils.two_dec_places)
ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
rolling_vol_ts = timeseries.rolling_volatility(returns, rolling_window)
rolling_vol_ts.plot(alpha=0.7, lw=2, color="orangered", ax=ax, **kwargs)
if factor_returns is not None:
rolling_vol_ts_factor = timeseries.rolling_volatility(
factor_returns, rolling_window
)
rolling_vol_ts_factor.plot(alpha=0.7, lw=2, color="grey", ax=ax, **kwargs)
ax.set_title("Rolling volatility (6-month)")
ax.axhline(rolling_vol_ts.mean(), color="steelblue", linestyle="--", lw=2)
ax.axhline(0.0, color="black", linestyle="--", lw=1, zorder=2)
ax.set_ylabel("Volatility")
ax.set_xlabel("")
if factor_returns is None:
ax.legend(
["Volatility", "Average volatility"],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
else:
ax.legend(
["Volatility", "Benchmark volatility", "Average volatility"],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
return ax
def plot_rolling_sharpe(
returns,
factor_returns=None,
rolling_window=APPROX_BDAYS_PER_MONTH * 6,
legend_loc="best",
ax=None,
**kwargs,
):
"""
Plots the rolling Sharpe ratio versus date.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factor_returns : pd.Series, optional
Daily noncumulative returns of the benchmark factor for
which the benchmark rolling Sharpe is computed. Usually
a benchmark such as market returns.
- This is in the same style as returns.
rolling_window : int, optional
The days window over which to compute the sharpe ratio.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
y_axis_formatter = FuncFormatter(utils.two_dec_places)
ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
rolling_sharpe_ts = timeseries.rolling_sharpe(returns, rolling_window)
rolling_sharpe_ts.plot(alpha=0.7, lw=2, color="orangered", ax=ax, **kwargs)
if factor_returns is not None:
rolling_sharpe_ts_factor = timeseries.rolling_sharpe(
factor_returns, rolling_window
)
rolling_sharpe_ts_factor.plot(alpha=0.7, lw=2, color="grey", ax=ax, **kwargs)
ax.set_title("Rolling Sharpe ratio (6-month)")
ax.axhline(rolling_sharpe_ts.mean(), color="steelblue", linestyle="--", lw=2)
ax.axhline(0.0, color="black", linestyle="--", lw=1, zorder=2)
ax.set_ylabel("Sharpe ratio")
ax.set_xlabel("")
if factor_returns is None:
ax.legend(["Sharpe", "Average"], loc=legend_loc, frameon=True, framealpha=0.5)
else:
ax.legend(
["Sharpe", "Benchmark Sharpe", "Average"],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
return ax
def plot_gross_leverage(returns, positions, ax=None, **kwargs):
"""
Plots gross leverage versus date.
Gross leverage is the sum of long and short exposure per share
divided by net asset value.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
gl = timeseries.gross_lev(positions)
gl.plot(lw=0.5, color="limegreen", legend=False, ax=ax, **kwargs)
ax.axhline(gl.mean(), color="g", linestyle="--", lw=3)
ax.set_title("Gross leverage")
ax.set_ylabel("Gross leverage")
ax.set_xlabel("")
return ax
def plot_exposures(returns, positions, ax=None, **kwargs):
"""
Plots a cake chart of the long and short exposure.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
positions_alloc : pd.DataFrame
Portfolio allocation of positions. See
pos.get_percent_alloc.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
pos_no_cash = positions.drop("cash", axis=1)
l_exp = pos_no_cash[pos_no_cash > 0].sum(axis=1) / positions.sum(axis=1)
s_exp = pos_no_cash[pos_no_cash < 0].sum(axis=1) / positions.sum(axis=1)
net_exp = pos_no_cash.sum(axis=1) / positions.sum(axis=1)
ax.fill_between(
l_exp.index, 0, l_exp.values, label="Long", color="green", alpha=0.5
)
ax.fill_between(s_exp.index, 0, s_exp.values, label="Short", color="red", alpha=0.5)
ax.plot(
net_exp.index,
net_exp.values,
label="Net",
color="black",
linestyle="dotted",
)
ax.set_xlim((returns.index[0], returns.index[-1]))
ax.set_title("Exposure")
ax.set_ylabel("Exposure")
ax.legend(loc="lower left", frameon=True, framealpha=0.5)
ax.set_xlabel("")
return ax
def show_and_plot_top_positions(
returns,
positions_alloc,
show_and_plot=2,
hide_positions=False,
legend_loc="real_best",
ax=None,
**kwargs,
):
"""
Prints and/or plots the exposures of the top 10 held positions of
all time.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
positions_alloc : pd.DataFrame
Portfolio allocation of positions. See pos.get_percent_alloc.
show_and_plot : int, optional
By default, this is 2, and both prints and plots.
If this is 0, it will only plot; if 1, it will only print.
hide_positions : bool, optional
If True, will not output any symbol names.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
By default, the legend will display below the plot.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes, conditional
The axes that were plotted on.
"""
positions_alloc = positions_alloc.copy()
positions_alloc.columns = positions_alloc.columns.map(utils.format_asset)
df_top_long, df_top_short, df_top_abs = pos.get_top_long_short_abs(positions_alloc)
if show_and_plot == 1 or show_and_plot == 2:
utils.print_table(
pd.DataFrame(df_top_long * 100, columns=["max"]),
float_format="{0:.2f}%".format,
name="Top 10 long positions of all time",
)
utils.print_table(
pd.DataFrame(df_top_short * 100, columns=["max"]),
float_format="{0:.2f}%".format,
name="Top 10 short positions of all time",
)
utils.print_table(
pd.DataFrame(df_top_abs * 100, columns=["max"]),
float_format="{0:.2f}%".format,
name="Top 10 positions of all time",
)
if show_and_plot == 0 or show_and_plot == 2:
if ax is None:
ax = plt.gca()
positions_alloc[df_top_abs.index].plot(
title="Portfolio allocation over time, only top 10 holdings",
alpha=0.5,
ax=ax,
**kwargs,
)
# Place legend below plot, shrink plot by 20%
if legend_loc == "real_best":
box = ax.get_position()
ax.set_position(
[
box.x0,
box.y0 + box.height * 0.1,
box.width,
box.height * 0.9,
]
)
# Put a legend below current axis
ax.legend(
loc="upper center",
frameon=True,
framealpha=0.5,
bbox_to_anchor=(0.5, -0.14),
ncol=5,
)
else:
ax.legend(loc=legend_loc)
ax.set_xlim((returns.index[0], returns.index[-1]))
ax.set_ylabel("Exposure by holding")
if hide_positions:
ax.legend_.remove()
return ax
def plot_max_median_position_concentration(positions, ax=None, **kwargs):
"""
Plots the max and median of long and short position concentrations
over the time.
Parameters
----------
positions : pd.DataFrame
The positions that the strategy takes over time.
ax : matplotlib.Axes, optional
Axes upon which to plot.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
alloc_summary = pos.get_max_median_position_concentration(positions)
colors = ["mediumblue", "steelblue", "tomato", "firebrick"]
alloc_summary.plot(linewidth=1, color=colors, alpha=0.6, ax=ax)
ax.legend(loc="center left", frameon=True, framealpha=0.5)
ax.set_ylabel("Exposure")
ax.set_title("Long/short max and median position concentration")
return ax
def plot_sector_allocations(returns, sector_alloc, ax=None, **kwargs):
"""
Plots the sector exposures of the portfolio over time.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
sector_alloc : pd.DataFrame
Portfolio allocation of positions. See pos.get_sector_alloc.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
sector_alloc.plot(title="Sector allocation over time", alpha=0.5, ax=ax, **kwargs)
box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9])
# Put a legend below current axis
ax.legend(
loc="upper center",
frameon=True,
framealpha=0.5,
bbox_to_anchor=(0.5, -0.14),
ncol=5,
)
ax.set_xlim((sector_alloc.index[0], sector_alloc.index[-1]))
ax.set_ylabel("Exposure by sector")
ax.set_xlabel("")
return ax
def plot_return_quantiles(returns, live_start_date=None, ax=None, **kwargs):
"""
Creates a box plot of daily, weekly, and monthly return
distributions.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
live_start_date : datetime, optional
The point in time when the strategy began live trading, after
its backtest period.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to seaborn plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
is_returns = (
returns
if live_start_date is None
else returns.loc[returns.index < live_start_date]
)
is_weekly = ep.aggregate_returns(is_returns, "weekly")
is_monthly = ep.aggregate_returns(is_returns, "monthly")
sns.boxplot(
data=[is_returns, is_weekly, is_monthly],
palette=["#4c72B0", "#55A868", "#CCB974"],
ax=ax,
**kwargs,
)
if live_start_date is not None:
oos_returns = returns.loc[returns.index >= live_start_date]
oos_weekly = ep.aggregate_returns(oos_returns, "weekly")
oos_monthly = ep.aggregate_returns(oos_returns, "monthly")
sns.swarmplot(
data=[oos_returns, oos_weekly, oos_monthly],
ax=ax,
palette="dark:red",
marker="d",
**kwargs,
)
red_dots = matplotlib.lines.Line2D(
[],
[],
color="red",
marker="d",
label="Out-of-sample data",
linestyle="",
)
ax.legend(handles=[red_dots], frameon=True, framealpha=0.5)
ax.set_xticklabels(["Daily", "Weekly", "Monthly"])
ax.set_title("Return quantiles")
return ax
def plot_turnover(
returns,
transactions,
positions,
turnover_denom="AGB",
legend_loc="best",
ax=None,
**kwargs,
):
"""
Plots turnover vs. date.
Turnover is the number of shares traded for a period as a fraction
of total shares.
Displays daily total, daily average per month, and all-time daily
average.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
turnover_denom : str, optional
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
legend_loc : matplotlib.loc, optional
The location of the legend on the plot.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
y_axis_formatter = FuncFormatter(utils.two_dec_places)
ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
df_turnover = txn.get_turnover(positions, transactions, turnover_denom)
df_turnover_by_month = df_turnover.resample("M").mean()
df_turnover.plot(color="steelblue", alpha=1.0, lw=0.5, ax=ax, **kwargs)
df_turnover_by_month.plot(color="orangered", alpha=0.5, lw=2, ax=ax, **kwargs)
ax.axhline(df_turnover.mean(), color="steelblue", linestyle="--", lw=3, alpha=1.0)
ax.legend(
[
"Daily turnover",
"Average daily turnover, by month",
"Average daily turnover, net",
],
loc=legend_loc,
frameon=True,
framealpha=0.5,
)
ax.set_title("Daily turnover")
ax.set_xlim((returns.index[0], returns.index[-1]))
ax.set_ylim((0, 2))
ax.set_ylabel("Turnover")
ax.set_xlabel("")
return ax
def plot_slippage_sweep(
returns,
positions,
transactions,
slippage_params=(3, 8, 10, 12, 15, 20, 50),
ax=None,
**kwargs,
):
"""
Plots equity curves at different per-dollar slippage assumptions.
Parameters
----------
returns : pd.Series
Timeseries of portfolio returns to be adjusted for various
degrees of slippage.
positions : pd.DataFrame
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
slippage_params: tuple
Slippage pameters to apply to the return time series (in
basis points).
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to seaborn plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
slippage_sweep = pd.DataFrame()
for bps in slippage_params:
adj_returns = txn.adjust_returns_for_slippage(
returns, positions, transactions, bps
)
label = str(bps) + " bps"
slippage_sweep[label] = ep.cum_returns(adj_returns, 1)
slippage_sweep.plot(alpha=1.0, lw=0.5, ax=ax)
ax.set_title("Cumulative returns given additional per-dollar slippage")
ax.set_ylabel("")
ax.legend(loc="center left", frameon=True, framealpha=0.5)
return ax
def plot_slippage_sensitivity(returns, positions, transactions, ax=None, **kwargs):
"""
Plots curve relating per-dollar slippage to average annual returns.
Parameters
----------
returns : pd.Series
Timeseries of portfolio returns to be adjusted for various
degrees of slippage.
positions : pd.DataFrame
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to seaborn plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
avg_returns_given_slippage = pd.Series(dtype="float64")
for bps in range(1, 100):
adj_returns = txn.adjust_returns_for_slippage(
returns, positions, transactions, bps
)
avg_returns = ep.annual_return(adj_returns)
avg_returns_given_slippage.loc[bps] = avg_returns
avg_returns_given_slippage.plot(alpha=1.0, lw=2, ax=ax)
ax.set_title("Average annual returns given additional per-dollar slippage")
ax.set_xticks(np.arange(0, 100, 10))
ax.set_ylabel("Average annual return")
ax.set_xlabel("Per-dollar slippage (bps)")
return ax
def plot_capacity_sweep(
returns,
transactions,
market_data,
bt_starting_capital,
min_pv=100000,
max_pv=300000000,
step_size=1000000,
ax=None,
):
txn_daily_w_bar = capacity.daily_txns_with_bar_data(transactions, market_data)
captial_base_sweep = pd.Series()
for start_pv in range(min_pv, max_pv, step_size):
adj_ret = capacity.apply_slippage_penalty(
returns, txn_daily_w_bar, start_pv, bt_starting_capital
)
sharpe = ep.sharpe_ratio(adj_ret)
if sharpe < -1:
break
captial_base_sweep.loc[start_pv] = sharpe
captial_base_sweep.index = captial_base_sweep.index / MM_DISPLAY_UNIT
if ax is None:
ax = plt.gca()
captial_base_sweep.plot(ax=ax)
ax.set_xlabel("Capital base ($mm)")
ax.set_ylabel("Sharpe ratio")
ax.set_title("Capital base performance sweep")
return ax
def plot_daily_turnover_hist(
transactions, positions, turnover_denom="AGB", ax=None, **kwargs
):
"""
Plots a histogram of daily turnover rates.
Parameters
----------
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
turnover_denom : str, optional
Either AGB or portfolio_value, default AGB.
- See full explanation in txn.get_turnover.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to seaborn plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
turnover = txn.get_turnover(positions, transactions, turnover_denom)
sns.histplot(turnover, ax=ax, **kwargs)
ax.set_title("Distribution of daily turnover rates")
ax.set_xlabel("Turnover rate")
return ax
def plot_daily_volume(returns, transactions, ax=None, **kwargs):
"""
Plots trading volume per day vs. date.
Also displays all-time daily average.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
daily_txn = txn.get_txn_vol(transactions)
daily_txn.txn_shares.plot(alpha=1.0, lw=0.5, ax=ax, **kwargs)
ax.axhline(
daily_txn.txn_shares.mean(),
color="steelblue",
linestyle="--",
lw=3,
alpha=1.0,
)
ax.set_title("Daily trading volume")
ax.set_xlim((returns.index[0], returns.index[-1]))
ax.set_ylabel("Amount of shares traded")
ax.set_xlabel("")
return ax
def plot_txn_time_hist(
transactions, bin_minutes=5, tz="America/New_York", ax=None, **kwargs
):
"""
Plots a histogram of transaction times, binning the times into
buckets of a given duration.
Parameters
----------
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
bin_minutes : float, optional
Sizes of the bins in minutes, defaults to 5 minutes.
tz : str, optional
Time zone to plot against. Note that if the specified
zone does not apply daylight savings, the distribution
may be partially offset.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.gca()
txn_time = transactions.copy()
txn_time.index = txn_time.index.tz_convert(pytz.timezone(tz))
txn_time.index = txn_time.index.map(lambda x: x.hour * 60 + x.minute)
txn_time["trade_value"] = (txn_time.amount * txn_time.price).abs()
txn_time = (
txn_time.groupby(level=0).sum(numeric_only=True).reindex(index=range(570, 961))
)
txn_time.index = (txn_time.index / bin_minutes).astype(int) * bin_minutes
txn_time = txn_time.groupby(level=0).sum(numeric_only=True)
txn_time["time_str"] = txn_time.index.map(
lambda x: str(datetime.time(int(x / 60), x % 60))[:-3]
)
trade_value_sum = txn_time.trade_value.sum()
txn_time.trade_value = txn_time.trade_value.fillna(0) / trade_value_sum
ax.bar(txn_time.index, txn_time.trade_value, width=bin_minutes, **kwargs)
ax.set_xlim(570, 960)
ax.set_xticks(txn_time.index[:: int(30 / bin_minutes)])
ax.set_xticklabels(txn_time.time_str[:: int(30 / bin_minutes)])
ax.set_title("Transaction time distribution")
ax.set_ylabel("Proportion")
ax.set_xlabel("")
return ax
def show_worst_drawdown_periods(returns, top=5):
"""
Prints information about the worst drawdown periods.
Prints peak dates, valley dates, recovery dates, and net
drawdowns.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
top : int, optional
Amount of top drawdowns periods to plot (default 5).
"""
drawdown_df = timeseries.gen_drawdown_table(returns, top=top)
utils.print_table(
drawdown_df.sort_values("Net drawdown in %", ascending=False),
name="Worst drawdown periods",
float_format="{0:.2f}".format,
)
def plot_monthly_returns_timeseries(returns, ax=None, **kwargs):
"""
Plots monthly returns as a timeseries.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Passed to seaborn plotting function.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
def cumulate_returns(x):
return ep.cum_returns(x)[-1]
if ax is None:
ax = plt.gca()
monthly_rets = returns.resample("M").apply(lambda x: cumulate_returns(x))
monthly_rets = monthly_rets.to_period()
sns.barplot(x=monthly_rets.index, y=monthly_rets.values, color="steelblue")
_, labels = plt.xticks()
plt.setp(labels, rotation=90)
# only show x-labels on year boundary
xticks_coord = []
xticks_label = []
count = 0
for i in monthly_rets.index:
if i.month == 1:
xticks_label.append(i)
xticks_coord.append(count)
# plot yearly boundary line
ax.axvline(count, color="gray", ls="--", alpha=0.3)
count += 1
ax.axhline(0.0, color="darkgray", ls="-")
ax.set_xticks(xticks_coord)
ax.set_xticklabels(xticks_label)
return ax
def plot_round_trip_lifetimes(round_trips, disp_amount=16, lsize=18, ax=None):
"""
Plots timespans and directions of a sample of round trip trades.
Parameters
----------
round_trips : pd.DataFrame
DataFrame with one row per round trip trade.
- See full explanation in round_trips.extract_round_trips
ax : matplotlib.Axes, optional
Axes upon which to plot.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
if ax is None:
ax = plt.subplot()
symbols_sample = round_trips.symbol.unique()
np.random.seed(1)
sample = np.random.choice(
round_trips.symbol.unique(),
replace=False,
size=min(disp_amount, len(symbols_sample)),
)
sample_round_trips = round_trips[round_trips.symbol.isin(sample)]
symbol_idx = pd.Series(np.arange(len(sample)), index=sample)
for symbol, sym_round_trips in sample_round_trips.groupby("symbol"):
for _, row in sym_round_trips.iterrows():
c = "b" if row.long else "r"
y_ix = symbol_idx[symbol] + 0.05
ax.plot(
[row["open_dt"], row["close_dt"]],
[y_ix, y_ix],
color=c,
linewidth=lsize,
solid_capstyle="butt",
)
ax.set_yticks(range(len(sample)))
ax.set_yticklabels([utils.format_asset(s) for s in sample])
ax.set_ylim((-0.5, min(len(sample), disp_amount) - 0.5))
blue = patches.Rectangle([0, 0], 1, 1, color="b", label="Long")
red = patches.Rectangle([0, 0], 1, 1, color="r", label="Short")
leg = ax.legend(handles=[blue, red], loc="lower left", frameon=True, framealpha=0.5)
leg.get_frame().set_edgecolor("black")
ax.grid(False)
return ax
def show_profit_attribution(round_trips):
"""
Prints the share of total PnL contributed by each
traded name.
Parameters
----------
round_trips : pd.DataFrame
DataFrame with one row per round trip trade.
- See full explanation in round_trips.extract_round_trips
ax : matplotlib.Axes, optional
Axes upon which to plot.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
total_pnl = round_trips["pnl"].sum()
pnl_attribution = round_trips.groupby("symbol")["pnl"].sum() / total_pnl
pnl_attribution.name = ""
pnl_attribution.index = pnl_attribution.index.map(utils.format_asset)
utils.print_table(
pnl_attribution.sort_values(
inplace=False,
ascending=False,
),
name="Profitability (PnL / PnL total) per name",
float_format="{:.2%}".format,
)
def plot_prob_profit_trade(round_trips, ax=None):
"""
Plots a probability distribution for the event of making
a profitable trade.
Parameters
----------
round_trips : pd.DataFrame
DataFrame with one row per round trip trade.
- See full explanation in round_trips.extract_round_trips
ax : matplotlib.Axes, optional
Axes upon which to plot.
Returns
-------
ax : matplotlib.Axes
The axes that were plotted on.
"""
x = np.linspace(0, 1.0, 500)
round_trips["profitable"] = round_trips.pnl > 0
dist = sp.stats.beta(round_trips.profitable.sum(), (~round_trips.profitable).sum())
y = dist.pdf(x)
lower_perc = dist.ppf(0.025)
upper_perc = dist.ppf(0.975)
lower_plot = dist.ppf(0.001)
upper_plot = dist.ppf(0.999)
if ax is None:
ax = plt.subplot()
ax.plot(x, y)
ax.axvline(lower_perc, color="0.5")
ax.axvline(upper_perc, color="0.5")
ax.set_xlabel("Probability of making a profitable decision")
ax.set_ylabel("Belief")
ax.set_xlim(lower_plot, upper_plot)
ax.set_ylim((0, y.max() + 1.0))
return ax
def plot_cones(
name,
bounds,
oos_returns,
num_samples=1000,
ax=None,
cone_std=(1.0, 1.5, 2.0),
random_seed=None,
num_strikes=3,
):
"""
Plots the upper and lower bounds of an n standard deviation
cone of forecasted cumulative returns. Redraws a new cone when
cumulative returns fall outside of last cone drawn.
Parameters
----------
name : str
Account name to be used as figure title.
bounds : pandas.core.frame.DataFrame
Contains upper and lower cone boundaries. Column names are
strings corresponding to the number of standard devations
above (positive) or below (negative) the projected mean
cumulative returns.
oos_returns : pandas.core.frame.DataFrame
Non-cumulative out-of-sample returns.
num_samples : int
Number of samples to draw from the in-sample daily returns.
Each sample will be an array with length num_days.
A higher number of samples will generate a more accurate
bootstrap cone.
ax : matplotlib.Axes, optional
Axes upon which to plot.
cone_std : list of int/float
Number of standard devations to use in the boundaries of
the cone. If multiple values are passed, cone bounds will
be generated for each value.
random_seed : int
Seed for the pseudorandom number generator used by the pandas
sample method.
num_strikes : int
Upper limit for number of cones drawn. Can be anything from 0 to 3.
Returns
-------
Returns are either an ax or fig option, but not both. If a
matplotlib.Axes instance is passed in as ax, then it will be modified
and returned. This allows for users to plot interactively in jupyter
notebook. When no ax object is passed in, a matplotlib.figure instance
is generated and returned. This figure can then be used to save
the plot as an image without viewing it.
ax : matplotlib.Axes
The axes that were plotted on.
fig : matplotlib.figure
The figure instance which contains all the plot elements.
"""
if ax is None:
fig = figure.Figure(figsize=(10, 8))
FigureCanvasAgg(fig)
axes = fig.add_subplot(111)
else:
axes = ax
returns = ep.cum_returns(oos_returns, starting_value=1.0)
bounds_tmp = bounds.copy()
returns_tmp = returns.copy()
cone_start = returns.index[0]
colors = ["green", "orange", "orangered", "darkred"]
for c in range(num_strikes + 1):
if c > 0:
tmp = returns.loc[cone_start:]
bounds_tmp = bounds_tmp.iloc[0 : len(tmp)]
bounds_tmp = bounds_tmp.set_index(tmp.index)
crossing = tmp < bounds_tmp[float(-2.0)].iloc[: len(tmp)]
if crossing.sum() <= 0:
break
cone_start = crossing.loc[crossing].index[0]
returns_tmp = returns.loc[cone_start:]
bounds_tmp = bounds - (1 - returns.loc[cone_start])
for std in cone_std:
x = returns_tmp.index
y1 = bounds_tmp[float(std)].iloc[: len(returns_tmp)]
y2 = bounds_tmp[float(-std)].iloc[: len(returns_tmp)]
axes.fill_between(x, y1, y2, color=colors[c], alpha=0.5)
# Plot returns line graph
label = "Cumulative returns = {:.2f}%".format((returns.iloc[-1] - 1) * 100)
axes.plot(returns.index, returns.values, color="black", lw=2.0, label=label)
if name is not None:
axes.set_title(name)
axes.axhline(1, color="black", alpha=0.2)
axes.legend(frameon=True, framealpha=0.5)
if ax is None:
return fig
else:
return axes
utils.py
#
# Copyright 2018 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from itertools import cycle
import empyrical.utils
import numpy as np
import pandas as pd
from IPython.display import display, HTML
from matplotlib.pyplot import cm
from packaging.version import Version
import os
import datetime
from . import pos
from . import txn
APPROX_BDAYS_PER_MONTH = 21
APPROX_BDAYS_PER_YEAR = 252
MONTHS_PER_YEAR = 12
WEEKS_PER_YEAR = 52
MM_DISPLAY_UNIT = 1000000.0
DAILY = "daily"
WEEKLY = "weekly"
MONTHLY = "monthly"
YEARLY = "yearly"
ANNUALIZATION_FACTORS = {
DAILY: APPROX_BDAYS_PER_YEAR,
WEEKLY: WEEKS_PER_YEAR,
MONTHLY: MONTHS_PER_YEAR,
}
COLORMAP = "Paired"
COLORS = [
"#e6194b",
"#3cb44b",
"#ffe119",
"#0082c8",
"#f58231",
"#911eb4",
"#46f0f0",
"#f032e6",
"#d2f53c",
"#fabebe",
"#008080",
"#e6beff",
"#aa6e28",
"#800000",
"#aaffc3",
"#808000",
"#ffd8b1",
"#000080",
"#808080",
]
pandas_version = Version(pd.__version__)
pandas_one_point_one_or_less = pandas_version < Version("1.2")
def one_dec_places(x, pos):
"""
Adds 1/10th decimal to plot ticks.
"""
return "%.1f" % x
def two_dec_places(x, pos):
"""
Adds 1/100th decimal to plot ticks.
"""
return "%.2f" % x
def percentage(x, pos):
"""
Adds percentage sign to plot ticks.
"""
return "%.0f%%" % x
def format_asset(asset):
"""
If zipline asset objects are used, we want to print them out prettily
within the tear sheet. This function should only be applied directly
before displaying.
"""
try:
import zipline.assets
except ImportError:
return asset
if isinstance(asset, zipline.assets.Asset):
return asset.symbol
else:
return asset
def vectorize(func):
"""
Decorator so that functions can be written to work on Series but
may still be called with DataFrames.
"""
def wrapper(df, *args, **kwargs):
if df.ndim == 1:
return func(df, *args, **kwargs)
elif df.ndim == 2:
return df.apply(func, *args, **kwargs)
return wrapper
def extract_rets_pos_txn_from_zipline(backtest):
"""
Extract returns, positions, transactions and leverage from the
backtest data structure returned by zipline.TradingAlgorithm.run().
The returned data structures are in a format compatible with the
rest of pyfolio and can be directly passed to
e.g. tears.create_full_tear_sheet().
Parameters
----------
backtest : pd.DataFrame
DataFrame returned by zipline.TradingAlgorithm.run()
Returns
-------
returns : pd.Series
Daily returns of strategy.
- See full explanation in tears.create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in tears.create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in tears.create_full_tear_sheet.
Example (on the Quantopian research platform)
---------------------------------------------
>>> backtest = my_algo.run()
>>> returns, positions, transactions =
>>> pyfolio.utils.extract_rets_pos_txn_from_zipline(backtest)
>>> pyfolio.tears.create_full_tear_sheet(returns,
>>> positions, transactions)
"""
backtest.index = backtest.index.normalize()
if backtest.index.tzinfo is None:
backtest.index = backtest.index.tz_localize("UTC")
returns = backtest.returns
raw_positions = []
for dt, pos_row in backtest.positions.items():
df = pd.DataFrame(pos_row)
df.index = [dt] * len(df)
raw_positions.append(df)
if not raw_positions:
raise ValueError("The backtest does not have any positions.")
positions = pd.concat(raw_positions)
positions = pos.extract_pos(positions, backtest.ending_cash)
transactions = txn.make_transaction_frame(backtest.transactions)
if transactions.index.tzinfo is None:
transactions.index = transactions.index.tz_localize("utc")
return returns, positions, transactions
def print_table(table, name=None, float_format=None, formatters=None,
header_rows=None, save_to_file=True,
output_dir='/home/shared/algos/ml4t/plots/temp'):
"""
Pretty print a pandas DataFrame and optionally save it as an HTML file with a unique timestamped filename.
Parameters
----------
table : pandas.Series or pandas.DataFrame
Table to pretty-print.
name : str, optional
Table name to display in upper left corner.
float_format : function, optional
Formatter to use for displaying table elements, passed as the
`float_format` arg to pd.Dataframe.to_html.
formatters : list or dict, optional
Formatters to use by column, passed as the `formatters` arg to
pd.Dataframe.to_html.
header_rows : dict, optional
Extra rows to display at the top of the table.
save_to_file : bool, optional
If True, save the table to an HTML file.
output_dir : str, optional
Directory where the HTML file will be saved.
"""
if isinstance(table, pd.Series):
table = pd.DataFrame(table)
if name is not None:
table.columns.name = name
html = table.to_html(float_format=float_format, formatters=formatters)
if header_rows is not None:
n_cols = html.split("<thead>")[1].split("</thead>")[0].count("<th>")
rows = ""
for name, value in header_rows.items():
rows += '\n <tr style="text-align: right;"><th>%s</th>' % name
rows += "<td colspan=%d>%s</td></tr>" % (n_cols, value)
html = html.replace("<thead>", "<thead>" + rows)
if save_to_file:
# Generate a timestamped filename
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f")
filename = f"table_{timestamp}.html"
file_path = os.path.join(output_dir, filename)
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
with open(file_path, 'w') as f:
f.write(html)
print(f"Table saved to {file_path}")
else:
display(HTML(html))
def standardize_data(x):
"""
Standardize an array with mean and standard deviation.
Parameters
----------
x : np.array
Array to standardize.
Returns
-------
np.array
Standardized array.
"""
return (x - np.mean(x)) / np.std(x)
def detect_intraday(positions, transactions, threshold=0.25):
"""
Attempt to detect an intraday strategy. Get the number of
positions held at the end of the day, and divide that by the
number of unique stocks transacted every day. If the average quotient
is below a threshold, then an intraday strategy is detected.
Parameters
----------
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
Returns
-------
boolean
True if an intraday strategy is detected.
"""
daily_txn = transactions.copy()
daily_txn.index = daily_txn.index.date
txn_count = daily_txn.groupby(level=0).symbol.nunique().sum()
daily_pos = positions.drop("cash", axis=1).replace(0, np.nan)
return daily_pos.count(axis=1).sum() / txn_count < threshold
def check_intraday(estimate, returns, positions, transactions):
"""
Logic for checking if a strategy is intraday and processing it.
Parameters
----------
estimate: boolean or str, optional
Approximate returns for intraday strategies.
See description in tears.create_full_tear_sheet.
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
Returns
-------
pd.DataFrame
Daily net position values, adjusted for intraday movement.
"""
if estimate == "infer":
if positions is not None and transactions is not None:
if detect_intraday(positions, transactions):
warnings.warn(
"Detected intraday strategy; inferring positi"
+ "ons from transactions. Set estimate_intraday"
+ "=False to disable."
)
return estimate_intraday(returns, positions, transactions)
else:
return positions
else:
return positions
elif estimate:
if positions is not None and transactions is not None:
return estimate_intraday(returns, positions, transactions)
else:
raise ValueError("Positions and txns needed to estimate intraday")
else:
return positions
def estimate_intraday(returns, positions, transactions, EOD_hour=23):
"""
Intraday strategies will often not hold positions at the day end.
This attempts to find the point in the day that best represents
the activity of the strategy on that day, and effectively resamples
the end-of-day positions with the positions at this point of day.
The point of day is found by detecting when our exposure in the
market is at its maximum point. Note that this is an estimate.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in create_full_tear_sheet.
positions : pd.DataFrame
Daily net position values.
- See full explanation in create_full_tear_sheet.
transactions : pd.DataFrame
Prices and amounts of executed trades. One row per trade.
- See full explanation in create_full_tear_sheet.
Returns
-------
pd.DataFrame
Daily net position values, resampled for intraday behavior.
"""
# Construct DataFrame of transaction amounts
txn_val = transactions.copy()
txn_val.index.names = ["date"]
txn_val["value"] = txn_val.amount * txn_val.price
txn_val = (
txn_val.reset_index()
.pivot_table(index="date", values="value", columns="symbol")
.replace(np.nan, 0)
)
# Cumulate transaction amounts each day
txn_val = txn_val.groupby(txn_val.index.date).cumsum()
# Calculate exposure, then take peak of exposure every day
txn_val["exposure"] = txn_val.abs().sum(axis=1)
condition = txn_val["exposure"] == txn_val.groupby(pd.Grouper(freq="24H"))[
"exposure"
].transform(max)
txn_val = txn_val[condition].drop("exposure", axis=1)
# Compute cash delta
txn_val["cash"] = -txn_val.sum(axis=1)
# Shift EOD positions to positions at start of next trading day
positions_shifted = positions.copy().shift(1).fillna(0)
starting_capital = positions.iloc[0].sum() / (1 + returns[0])
positions_shifted.cash[0] = starting_capital
# Format and add start positions to intraday position changes
txn_val.index = txn_val.index.normalize()
corrected_positions = positions_shifted.add(txn_val, fill_value=0)
corrected_positions.index.name = "period_close"
corrected_positions.columns.name = "sid"
return corrected_positions
def clip_returns_to_benchmark(rets, benchmark_rets):
"""
Drop entries from rets so that the start and end dates of rets match those
of benchmark_rets.
Parameters
----------
rets : pd.Series
Daily returns of the strategy, noncumulative.
- See pf.tears.create_full_tear_sheet for more details
benchmark_rets : pd.Series
Daily returns of the benchmark, noncumulative.
Returns
-------
clipped_rets : pd.Series
Daily noncumulative returns with index clipped to match that of
benchmark returns.
"""
if (rets.index[0] < benchmark_rets.index[0]) or (
rets.index[-1] > benchmark_rets.index[-1]
):
clipped_rets = rets[benchmark_rets.index]
else:
clipped_rets = rets
return clipped_rets
def to_utc(df):
"""
For use in tests; applied UTC timestamp to DataFrame.
"""
try:
df.index = df.index.tz_localize("UTC")
except TypeError:
df.index = df.index.tz_convert("UTC")
return df
def to_series(df):
"""
For use in tests; converts DataFrame's first column to Series.
"""
return df[df.columns[0]]
# This functions is simply a passthrough to empyrical, but is
# required by the register_returns_func and get_symbol_rets.
default_returns_func = empyrical.utils.default_returns_func
# Settings dict to store functions/values that may
# need to be overridden depending on the users environment
SETTINGS = {"returns_func": default_returns_func}
def register_return_func(func):
"""
Registers the 'returns_func' that will be called for
retrieving returns data.
Parameters
----------
func : function
A function that returns a pandas Series of asset returns.
The signature of the function must be as follows
>>> func(symbol)
Where symbol is an asset identifier
Returns
-------
None
"""
SETTINGS["returns_func"] = func
def get_symbol_rets(symbol, start=None, end=None):
"""
Calls the currently registered 'returns_func'
Parameters
----------
symbol : object
An identifier for the asset whose return
series is desired.
e.g. ticker symbol or database ID
start : date, optional
Earliest date to fetch data for.
Defaults to earliest date available.
end : date, optional
Latest date to fetch data for.
Defaults to latest date available.
Returns
-------
pandas.Series
Returned by the current 'returns_func'
"""
return SETTINGS["returns_func"](symbol, start=start, end=end)
def configure_legend(
ax, autofmt_xdate=True, change_colors=False, rotation=30, ha="right"
):
"""
Format legend for perf attribution plots:
- put legend to the right of plot instead of overlapping with it
- make legend order match up with graph lines
- set colors according to colormap
"""
chartBox = ax.get_position()
ax.set_position([chartBox.x0, chartBox.y0, chartBox.width * 0.75, chartBox.height])
# make legend order match graph lines
handles, labels = ax.get_legend_handles_labels()
handles_and_labels_sorted = sorted(
zip(handles, labels), key=lambda x: x[0].get_ydata()[-1], reverse=True
)
handles_sorted = [h[0] for h in handles_and_labels_sorted]
labels_sorted = [h[1] for h in handles_and_labels_sorted]
if change_colors:
for handle, color in zip(handles_sorted, cycle(COLORS)):
handle.set_color(color)
ax.legend(
handles=handles_sorted,
labels=labels_sorted,
frameon=True,
framealpha=0.5,
loc="upper left",
bbox_to_anchor=(1.05, 1),
fontsize="small",
)
# manually rotate xticklabels instead of using matplotlib's autofmt_xdate
# because it disables xticklabels for all but the last plot
if autofmt_xdate:
for label in ax.get_xticklabels():
label.set_ha(ha)
label.set_rotation(rotation)
def sample_colormap(cmap_name, n_samples):
"""
Sample a colormap from matplotlib
"""
colors = []
colormap = cm.cmap_d[cmap_name]
for i in np.linspace(0, 1, n_samples):
colors.append(colormap(i))
return colors