First, be sure to be in the main folder, or to have installed SMPyBandits
, and import Evaluator
from Environment
package:
import numpy as np
import matplotlib.pyplot as plt
!pip install SMPyBandits watermark
%load_ext watermark
%watermark -v -m -p SMPyBandits -a "Lilian Besson"
# Local imports
from SMPyBandits.Environment import Evaluator, tqdm
from SMPyBandits.Environment.plotsettings import legend, makemarkers
We also need arms, for instance Bernoulli
-distributed arm:
# Import arms
from SMPyBandits.Arms import Bernoulli
And finally we need some single-player Reinforcement Learning algorithms:
# Import algorithms
from SMPyBandits.Policies import *
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12.4, 7)
N_JOBS = 4
is the number of cores used to parallelize the code.HORIZON = 20000
REPETITIONS = 40
N_JOBS = 4
We consider in this example $3$ problems, with Bernoulli
arms, of different means.
ENVIRONMENTS = [ # 1) Bernoulli arms
{ # A very easy problem, but it is used in a lot of articles
"arm_type": Bernoulli,
"params": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
}
]
We compare some policies that use the DoublingTrickWrapper
policy, with a common growing scheme.
NEXT_HORIZONS = [
# next_horizon__arithmetic,
next_horizon__geometric,
# next_horizon__exponential,
# next_horizon__exponential_slow,
next_horizon__exponential_generic
]
POLICIES = [
# --- Doubling trick algorithm
{
"archtype": DoublingTrickWrapper,
"params": {
"next_horizon": next_horizon,
"full_restart": full_restart,
"policy": policy,
}
}
for policy in [
UCBH,
MOSSH,
klUCBPlusPlus,
ApproximatedFHGittins,
]
for full_restart in [
True,
# False,
]
for next_horizon in NEXT_HORIZONS
]
Complete configuration for the problem:
configuration = {
# --- Duration of the experiment
"horizon": HORIZON,
# --- Number of repetition of the experiment (to have an average)
"repetitions": REPETITIONS,
# --- Parameters for the use of joblib.Parallel
"n_jobs": N_JOBS, # = nb of CPU cores
"verbosity": 6, # Max joblib verbosity
# --- Arms
"environment": ENVIRONMENTS,
# --- Algorithms
"policies": POLICIES,
}
configuration
Evaluator
object¶evaluation = Evaluator(configuration)
Now we can simulate all the $3$ environments. That part can take some time.
for envId, env in tqdm(enumerate(evaluation.envs), desc="Problems"):
# Evaluate just that env
evaluation.startOneEnv(envId, env)
And finally, visualize them, with the plotting method of a Evaluator
object:
def plotAll(evaluation, envId):
evaluation.printFinalRanking(envId)
fig = evaluation.plotRegrets(envId)
# evaluation.plotRegrets(envId, semilogx=True)
# evaluation.plotRegrets(envId, meanRegret=True)
# evaluation.plotBestArmPulls(envId)
return fig
fig = plotAll(evaluation, 0)
DEFAULT_FIRST_HORIZON = 100
def lower_bound_with_breakpoints(next_horizon, horizon, env,
first_horizon=DEFAULT_FIRST_HORIZON,
fig=None, marker=None):
points, gap = breakpoints(next_horizon, first_horizon, horizon)
X = np.arange(1, horizon)
Y = np.log(X)
# Durty estimate
for estimate_horizon in points:
if estimate_horizon <= horizon:
before_breakpoint = np.max(np.where(X == estimate_horizon - 1)[0])
lower_bound_before_breakpoint = Y[before_breakpoint]
print("At time {}, lowerbound was {}".format(estimate_horizon, lower_bound_before_breakpoint))
after = np.where(X >= estimate_horizon)
Y[after] = np.log(X[after] - X[before_breakpoint]) + lower_bound_before_breakpoint
if fig is None: # new figure if needed
fig, ax = plt.subplots()
ax.set_xlabel("Time steps t=1..T, $T = {}$".format(horizon))
ax.set_ylabel("Regret lower-bound")
ax.set_title("Lai & Robbins lower-bound for problem with $K={}$ arms and $C_K={:.3g}$\nAnd doubling trick with restart points ({})".format(env.nbArms, env.lowerbound(), next_horizon.__latex_name__))
else:
ax = fig.axes[0]
# https://stackoverflow.com/a/26845924/
ax_legend = ax.legend()
ax_legend.remove()
complexity = env.lowerbound()
ax.plot(X, complexity * Y,
'k--' if marker is None else '{}k--'.format(marker),
markevery=(0.0, 0.1),
label="LB, DT restart ({})".format(next_horizon.__latex_name__))
legend(fig=fig)
fig.show()
return fig
_ = lower_bound_with_breakpoints(next_horizon__exponential_generic, HORIZON, evaluation.envs[0])
fig = plotAll(evaluation, 0)
markers = makemarkers(len(NEXT_HORIZONS))
for i, next_horizon in enumerate(NEXT_HORIZONS):
fig = lower_bound_with_breakpoints(next_horizon, HORIZON, evaluation.envs[0], fig=fig, marker=markers[i])
fig
That's it for today, folks!