Benchmark Figures on Selected Tasks¶
Setup¶
[1]:
import ast
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
[2]:
# Define color palette
sns.set_theme(
color_codes=True, palette="bright", style="ticks", context="talk", font_scale=1.5
)
[3]:
def load_result(filename):
"""
Loads results from specified file
"""
inputs = open(filename, "r")
lines = inputs.readlines()
ls = []
for line in lines:
ls.append(ast.literal_eval(line))
return ls
def plot_acc(col, ls, pos, n_train, lw=5):
if pos == 0:
for i, l in enumerate(ls[pos]):
col.plot(
n_train,
np.mean(l, axis=0),
label=legends[i],
lw=lw,
color=colors[i],
linestyle=styles[i],
)
qunatiles = np.nanquantile(l, [0.25, 0.75], axis=0)
col.fill_between(
n_train,
qunatiles[0],
qunatiles[1],
lw=1,
facecolor=colors[i],
linestyle=styles[i],
alpha=0.3,
)
else:
for i, l in enumerate(ls[pos]):
col.plot(
n_train,
np.mean(l, axis=0),
lw=lw,
color=colors[i],
linestyle=styles[i],
)
qunatiles = np.nanquantile(l, [0.25, 0.75], axis=0)
col.fill_between(
n_train,
qunatiles[0],
qunatiles[1],
lw=1,
facecolor=colors[i],
linestyle=styles[i],
alpha=0.3,
)
[4]:
directory = "../benchmarks/results/"
datasets = ["splice", "pendigits", "cifar10"]
ranges = [23, 74, 500]
Accuracy Plot for Streaming Classifiers¶
[5]:
prefixes = ["sdf/", "sdt/", "ht/", "mf/"]
legends = ["SDF", "SDT", "HT", "MF"]
colors = ["r", "r", "g", "y"]
styles = ["-", "--", "--", "-"]
[6]:
acc_ls = []
for i, dataset in enumerate(datasets):
acc_l = []
for prefix in prefixes:
acc = load_result(directory + prefix + dataset + "_acc.txt")[:10]
acc_l.append(acc)
acc_ls.append(acc_l)
[7]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(17, 6), constrained_layout=True)
fig.text(0.53, -0.07, "Number of Train Samples", ha="center")
xtitles = ["Splice", "Pendigits", "CIFAR-10"]
ytitles = ["Accuracy"]
ylimits = [[0, 1]]
yticks = [[0, 0.5, 1]]
for i, col in enumerate(ax):
col.set_xscale("log")
col.set_ylim(ylimits[0])
n_train = range(100, (ranges[i] + 1) * 100, 100)
# Label x axis and plot figures
col.set_title(xtitles[i])
plot_acc(col, acc_ls, i, n_train)
# Label y axis
if i % 3 == 0:
col.set_yticks(yticks[0])
col.set_ylabel(ytitles[i])
else:
col.set_yticks([])
fig.align_ylabels(
ax[
:,
]
)
leg = fig.legend(
bbox_to_anchor=(0.53, -0.22),
bbox_transform=plt.gcf().transFigure,
ncol=6,
loc="lower center",
)
leg.get_frame().set_linewidth(0.0)
for legobj in leg.legendHandles:
legobj.set_linewidth(5.0)
plt.savefig("../paper/select_acc_stream.pdf", transparent=True, bbox_inches="tight")

Accuracy Plot for Batch Classifiers¶
[8]:
prefixes = ["sdf/", "rf/", "sdt/", "dt/"]
legends = ["SDF", "DF", "SDT", "DT"]
colors = ["r", "b", "r", "b"]
styles = ["-", "-", "--", "--"]
[9]:
acc_ls = []
for i, dataset in enumerate(datasets):
acc_l = []
for prefix in prefixes:
acc = load_result(directory + prefix + dataset + "_acc.txt")[:10]
acc_l.append(acc)
acc_ls.append(acc_l)
[10]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(17, 6), constrained_layout=True)
fig.text(0.53, -0.07, "Number of Train Samples", ha="center")
xtitles = ["Splice", "Pendigits", "CIFAR-10"]
ytitles = ["Accuracy"]
ylimits = [[0, 1]]
yticks = [[0, 0.5, 1]]
for i, col in enumerate(ax):
col.set_xscale("log")
col.set_ylim(ylimits[0])
n_train = range(100, (ranges[i] + 1) * 100, 100)
# Label x axis and plot figures
col.set_title(xtitles[i])
plot_acc(col, acc_ls, i, n_train)
# Label y axis
if i % 3 == 0:
col.set_yticks(yticks[0])
col.set_ylabel(ytitles[i])
else:
col.set_yticks([])
fig.align_ylabels(
ax[
:,
]
)
leg = fig.legend(
bbox_to_anchor=(0.53, -0.22),
bbox_transform=plt.gcf().transFigure,
ncol=6,
loc="lower center",
)
leg.get_frame().set_linewidth(0.0)
for legobj in leg.legendHandles:
legobj.set_linewidth(5.0)
plt.savefig("../paper/select_acc_batch.pdf", transparent=True, bbox_inches="tight")

Time Plot¶
[11]:
prefixes = ["sdf/", "rf/", "sdt/", "dt/", "ht/", "mf/"]
legends = ["SDF", "DF", "SDT", "DT", "HT", "MF"]
colors = ["r", "b", "r", "b", "g", "y"]
styles = ["-", "-", "--", "--", "--", "-"]
[12]:
# Show concatenated mem for batch estimators
concat = True
time_ls = []
for i, dataset in enumerate(datasets):
time_l = []
for prefix in prefixes:
time = load_result(directory + prefix + dataset + "_train_t.txt")[:10]
if concat and (prefix == "dt/" or prefix == "rf/"):
for t in time:
for j in range(1, ranges[i]):
t[j] += t[j - 1]
time_l.append(time)
time_ls.append(time_l)
[13]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(17, 6), constrained_layout=True)
fig.text(0.53, -0.07, "Number of Train Samples", ha="center")
xtitles = ["Splice", "Pendigits", "CIFAR-10"]
ytitles = ["Wall Time (s)"]
ylimits = [[1e-4, 1e5]]
yticks = [[1e-4, 1e-1, 1e2, 1e5]]
for i, col in enumerate(ax):
col.set_xscale("log")
col.set_yscale("log")
col.set_ylim(ylimits[0])
n_train = range(100, (ranges[i] + 1) * 100, 100)
# Label x axis and plot figures
col.set_title(xtitles[i])
plot_acc(col, time_ls, i, n_train)
# Label y axis
if i % 3 == 0:
col.set_yticks(yticks[0])
col.set_ylabel(ytitles[i])
else:
col.set_yticks([])
fig.align_ylabels(
ax[
:,
]
)
leg = fig.legend(
bbox_to_anchor=(0.53, -0.22),
bbox_transform=plt.gcf().transFigure,
ncol=6,
loc="lower center",
)
leg.get_frame().set_linewidth(0.0)
for legobj in leg.legendHandles:
legobj.set_linewidth(5.0)
plt.savefig("../paper/select_time.pdf", transparent=True, bbox_inches="tight")

Memory Plot¶
[14]:
# Reorder plots
prefixes = ["sdf/", "sdt/", "ht/", "mf/"]
legends = ["SDF", "SDT", "HT", "MF"]
colors = ["r", "r", "g", "y"]
styles = ["-", "--", "--", "-"]
# Load CIFAR-10 memory records
cifar_data_mem = 8.0 * 56 / 100 * np.ones(len(n_train))
cifar_mem_l = []
for prefix in prefixes:
cifar_mem = (
np.mean(load_result(directory + prefix + dataset + "_v_m.txt")[:1], axis=0)
* 56
/ 100
# - cifar_data_mem
)
cifar_mem_l.append([cifar_mem])
[15]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 7), constrained_layout=True)
xtitle = "Number of Train Samples"
ytitle = "Max Virtual Memory (GB)"
ylimit = [0, 32]
ytick = [0, 8, 16, 24, 32]
ax.set_xscale("log")
ax.set_ylim(ylimit)
n_train = range(100, (ranges[2] + 1) * 100, 100)
# Plot dataset size
ax.plot(n_train, cifar_data_mem, label="dataset", color="k", linestyle="dotted", lw=3)
# Label x axis and plot figures
plot_acc(ax, [cifar_mem_l], 0, n_train, lw=3)
ax.set_xlabel(xtitle)
# Label y axis
ax.set_yticks(ytick)
ax.set_ylabel(ytitle)
# Legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], frameon=False)
plt.savefig(
"../paper/select_cifar_mem_stream.pdf", transparent=True, bbox_inches="tight"
)
