The usual bootstrapping method doesn't preserve the ordering of time series data, and it is, therefore, unsuitable for trend estimation. In the block bootstrapping approach, we split data into non-overlapping blocks of equal size and use those blocks to generate new samples. In this recipe, we will apply a very naive and easy-to-implement linear model with annual temperature data. The procedure for this recipe is as follows:
import dautil as dl import random import matplotlib.pyplot as plt import pandas as pd import numpy as np import seaborn as sns import ch6util from IPython.display import HTML
def shuffle(temp, blocks): random.shuffle(blocks) df = pd.DataFrame({'TEMP': dl.collect.flatten(blocks)}, index=temp.index) df = df.resample('A') return df
temp = dl.data.Weather.load()['TEMP'].resample('M').dropna() blocks = list(dl.collect.chunk(temp.values, 100)) random.seed(12033)
sp = dl.plotting.Subplotter(2, 2, context) cp = dl.plotting.CyclePlotter(sp.ax) medians = [] slopes = [] for i in range(240): df = shuffle(temp, blocks) slopes.append(ch6util.fit(df)) medians.append(ch6util.diff_median(df)) if i < 5: cp.plot(df.index, df.values) sp.label(ylabel_params=dl.data.Weather.get_header('TEMP'))
sns.distplot(medians, ax=sp.next_ax(), norm_hist=True) sp.label()
sns.distplot(slopes, ax=sp.next_ax(), norm_hist=True) sp.label()
mins = [] tops = [] xrng = range(30, len(medians)) for i in xrng: min, max = dl.stats.outliers(medians[:i]) mins.append(min) tops.append(max) cp = dl.plotting.CyclePlotter(sp.next_ax()) cp.plot(xrng, mins, label='5 %') cp.plot(xrng, tops, label='95 %') sp.label() HTML(sp.exit())
Refer to the following screenshot for the end result:
The following code comes from the block_boot.ipynb
file in this book's code bundle.