<p>此代码适用于所有数据集(e1、e2、e3、e4、e5、e6 dat文件)</p>
<pre class="lang-py prettyprint-override"><code>import urllib
import io
import pandas as pd
URL1 = "http://www.jmulti.de/download/datasets/e1.dat"
URL2 = "http://www.jmulti.de/download/datasets/e2.dat"
URL3 = "http://www.jmulti.de/download/datasets/e3.dat"
URL4 = "http://www.jmulti.de/download/datasets/e4.dat"
URL5 = "http://www.jmulti.de/download/datasets/e5.dat"
URL6 = "http://www.jmulti.de/download/datasets/e6.dat"
def read_data(url):
OFFSETS = {"Q": 3, "M": 0}
stream = io.StringIO(urllib.request.urlopen(url).read().decode("iso8859"))
while True:
sre = re.match(r"<(\d{4}) ([QM])(\d+)>", stream.readline())
if sre:
year = int(sre.group(1))
freq = sre.group(2)
offset = int(sre.group(3))
break
cols = stream.readline().split()
df = pd.read_fwf(stream, header=None, names=cols)
dti = pd.date_range(f"{year}-1-1", periods=len(df), freq=freq+"S") \
+ pd.DateOffset(months=(offset-1)*OFFSET[freq])
df["month"] = dti.month
df["quarter"] = dti.quarter
df["year"] = dti.year
return df
</code></pre>
<pre class="lang-py prettyprint-override"><code>>>> read_data(URL6)
Dp R month quarter year
0 -0.003133 0.083 4 2 1972
1 0.018871 0.083 7 3 1972
2 0.024804 0.087 10 4 1972
3 0.016278 0.087 1 1 1973
4 0.000290 0.102 4 2 1973
.. ... ... ... ... ...
102 0.024245 0.051 10 4 1997
103 -0.014647 0.047 1 1 1998
104 -0.002049 0.047 4 2 1998
105 0.002475 0.041 7 3 1998
106 0.023923 0.038 10 4 1998
[107 rows x 5 columns]
>>> read_data(URL5)
i_short i_long month quarter year
0 4.36 6.2 1 1 1960
1 4.47 6.2 2 1 1960
2 4.71 6.2 3 1 1960
3 4.59 6.2 4 2 1960
4 4.64 6.2 5 2 1960
.. ... ... ... ... ...
331 3.95 6.0 8 3 1987
332 3.99 6.2 9 3 1987
333 4.70 6.5 10 4 1987
334 3.94 6.0 11 4 1987
335 3.65 5.8 12 4 1987
[336 rows x 5 columns]
</code></pre>