作者:曹鑫
双十一到今年已经是13个年头,每年大家都在满心期待看着屏幕上的数字跳动,年年打破记录。而2019年的天猫双11的销售额却被一位微博网友提前7个月用数据拟合的方法预测出来了。他的预测值是2675.37或者2689.00亿元,而实际成交额是2684亿元。只差了5亿元,误差率只有千分之一。
但如果你用同样的方法去做预测2020年的时候,发现,预测是3282亿,实际却到了4982亿。原来2020改了规则,实际上统计的是11月1到11日的销量,理论上已经不能和历史数据合并预测,但咱们就为了图个乐,主要是为了练习一下Python的多项式回归和可视化绘图。
把预测先发出来:今年双十一的销量是9029.688亿元!坐等双十一,各位看官回来打我的脸。
NO.01、统计历年双十一销量数据
从网上搜集来历年淘宝天猫双十一销售额数据,单位为亿元,利用Pandas整理成Dataframe,又添加了一列'年份int',留作后续的计算使用。
importpandasaspd\n\n\n#数据为网络收集,历年淘宝天猫双十一销售额数据,单位为亿元,仅做示范\ndouble11_sales={'2009年':[0.50],\n'2010年':[9.36],\n'2011年':[34],\n'2012年':[191],\n'2013年':[350],\n'2014年':[571],\n'2015年':[912],\n'2016年':[1207],\n'2017年':[1682],\n'2018年':[2135],\n'2019年':[2684],\n'2020年':[4982],\n}\n\n\ndf=pd.DataFrame(double11_sales).T.reset_index()\ndf.rename(columns={'index':'年份',0:'销量'},inplace=True)\ndf['年份int']=[[i]foriinlist(range(1,len(df['年份'])+1))]\ndf
.dataframetbodytrth{\nvertical-align:top;\n}\n\n\n.dataframetheadth{\ntext-align:right;\n}
NO.02、绘制散点图
利用plotly工具包,将年份对应销售量的散点图绘制出来,可以明显看到2020年的数据立马飙升。
#散点图\nimportplotlyaspy\nimportplotly.graph_objsasgo\nimportnumpyasnp\n\n\nyear=df[:]['年份']\nsales=df['销量']\n\n\ntrace=go.Scatter(\nx=year,\ny=sales,\nmode='markers'\n)\ndata=[trace]\n\n\nlayout=go.Layout(title='2009年-2020年天猫淘宝双十一历年销量')\n\n\nfig=go.Figure(data=data,layout=layout)\n\n\nfig.show()
<divid="2b361fe9-adc3-4cbe-810c-f76371d70c59"class="plotly-graph-div"style="height:525px;width:100%;"></div>\n<scripttype="text/javascript">\nrequire(["plotly"],function(Plotly){\nwindow.PLOTLYENV=window.PLOTLYENV||{};\n\n\nif(document.getElementById("2b361fe9-adc3-4cbe-810c-f76371d70c59")){\nPlotly.newPlot(\n'2b361fe9-adc3-4cbe-810c-f76371d70c59',\n[{"mode":"markers","type":"scatter","x":["2009\\u5e74","2010\\u5e74","2011\\u5e74","2012\\u5e74","2013\\u5e74","2014\\u5e74","2015\\u5e74","2016\\u5e74","2017\\u5e74","2018\\u5e74","2019\\u5e74","2020\\u5e74"],"y":[0.5,9.36,34.0,191.0,350.0,571.0,912.0,1207.0,1682.0,2135.0,2684.0,4982.0]}],\n{"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"title":{"text":"2009\\u5e74-2020\\u5e74\\u5929\\u732b\\u6dd8\\u5b9d\\u53cc\\u5341\\u4e00\\u5386\\u5e74\\u9500\\u91cf"}},\n{"responsive":true}\n).then(function(){
vargd=document.getElementById('2b361fe9-adc3-4cbe-810c-f76371d70c59');\nvarx=newMutationObserver(function(mutations,observer){{\nvardisplay=window.getComputedStyle(gd).display;\nif(!display||display==='none'){{\nconsole.log([gd,'removed!']);\nPlotly.purge(gd);\nobserver.disconnect();\n}}\n}});\n\n\n//Listenfortheremovalofthefullnotebookcells\nvarnotebookContainer=gd.closest('#notebook-container');\nif(notebookContainer){{\nx.observe(notebookContainer,{childList:true});\n}}\n\n\n//Listenfortheclearingofthecurrentoutputcell\nvaroutputEl=gd.closest('.output');\nif(outputEl){{\nx.observe(outputEl,{childList:true});\n}}
})\n};\n});\n</script>\n</div>
NO.03、引入Scikit-Learn库搭建模型
一元多次线性回归
我们先来回顾一下2009-2019年的数据多么美妙。先只选取2009-2019年的数据:
df_2009_2019=df[:-1]\ndf_2009_2019
.dataframetbodytrth{\nvertical-align:top;\n}\n\n\n.dataframetheadth{\ntext-align:right;\n}
通过以下代码生成二次项数据:
fromsklearn.preprocessingimportPolynomialFeatures\npoly_reg=PolynomialFeatures(degree=2)\nX_=poly_reg.fit_transform(list(df_2009_2019['年份int']))
1.第一行代码引入用于增加一个多次项内容的模块PolynomialFeatures
2.第二行代码设置最高次项为二次项,为生成二次项数据(x平方)做准备
3.第三行代码将原有的X转换为一个新的二维数组X_,该二维数据包含新生成的二次项数据(x平方)和原有的一次项数据(x)
X_的内容为下方代码所示的一个二维数组,其中第一列数据为常数项(其实就是X的0次方),没有特殊含义,对分析结果不会产生影响;第二列数据为原有的一次项数据(x);第三列数据为新生成的二次项数据(x的平方)。
X_
array([[1.,1.,1.],\n[1.,2.,4.],\n[1.,3.,9.],\n[1.,4.,16.],\n[1.,5.,25.],\n[1.,6.,36.],\n[1.,7.,49.],\n[1.,8.,64.],\n[1.,9.,81.],\n[1.,10.,100.],\n[1.,11.,121.]])
fromsklearn.linear_modelimportLinearRegression\nregr=LinearRegression()\nregr.fit(X_,list(df_2009_2019['销量']))
LinearRegression()
1.第一行代码从Scikit-Learn库引入线性回归的相关模块LinearRegression;
2.第二行代码构造一个初始的线性回归模型并命名为regr;
3.第三行代码用fit()函数完成模型搭建,此时的regr就是一个搭建好的线性回归模型。
NO.04、模型预测
接下来就可以利用搭建好的模型regr来预测数据。加上自变量是12,那么使用predict()函数就能预测对应的因变量有,代码如下:
XX_=poly_reg.fit_transform([[12]])
XX_
array([[1.,12.,144.]])
y=regr.predict(XX_)\ny
array([3282.23478788])
这里我们就得到了如果按照这个趋势2009-2019的趋势预测2020的结果,就是3282,但实际却是4982亿,原因就是上文提到的合并计算了,金额一下子变大了,绘制成图,就是下面这样:
#散点图\nimportplotlyaspy\nimportplotly.graph_objsasgo\nimportnumpyasnp\n\n\nyear=list(df['年份'])\nsales=df['销量']\n\n\ntrace1=go.Scatter(\nx=year,\ny=sales,\nmode='markers',\nname="实际销量"#第一个图例名称\n)\n\n\nXX_=poly_reg.fit_transform(list(df['年份int'])+[[13]])\nregr=LinearRegression()\nregr.fit(X_,list(df_2009_2019['销量']))\ntrace2=go.Scatter(\nx=list(df['年份']),\ny=regr.predict(XX_),\nmode='lines',\nname="拟合数据",#第2个图例名称\n)\n\n\n\n\ndata=[trace1,trace2]\n\n\nlayout=go.Layout(title='天猫淘宝双十一历年销量',\nxaxis_title='年份',\nyaxis_title='销量')\n\n\nfig=go.Figure(data=data,layout=layout)\n\n\nfig.show()
-
vargd=document.getElementById('e8ae9262-7d14-4b38-b661-fb79f13ff6a7');\nvarx=newMutationObserver(function(mutations,observer){{\nvardisplay=window.getComputedStyle(gd).display;\nif(!display||display==='none'){{\nconsole.log([gd,'removed!']);\nPlotly.purge(gd);\nobserver.disconnect();\n}}\n}});\n\n\n//Listenfortheremovalofthefullnotebookcells\nvarnotebookContainer=gd.closest('#notebook-container');\nif(notebookContainer){{\nx.observe(notebookContainer,{childList:true});\n}}\n\n\n//Listenfortheclearingofthecurrentoutputcell\nvaroutputEl=gd.closest('.output');\nif(outputEl){{\nx.observe(outputEl,{childList:true});\n}}
})\n};\n});\n</script>\n</div>
NO.05、预测2021年的销量
既然数据发生了巨大的偏离,咱们也别深究了,就大力出奇迹。同样的方法,把2020年的真实数据纳入进来,二话不说拟合一样,看看会得到什么结果:
fromsklearn.preprocessingimportPolynomialFeatures\npoly_reg=PolynomialFeatures(degree=5)\nX_=poly_reg.fit_transform(list(df['年份int']))
##预测2020年\nregr=LinearRegression()\nregr.fit(X_,list(df['销量']))
LinearRegression()
XXX_=poly_reg.fit_transform(list(df['年份int'])+[[13]])
#散点图\nimportplotlyaspy\nimportplotly.graph_objsasgo\nimportnumpyasnp\n\n\nyear=list(df['年份'])\nsales=df['销量']\n\n\ntrace1=go.Scatter(\nx=year+['2021年','2022年','2023年'],\ny=sales,\nmode='markers',\nname="实际销量"#第一个图例名称\n)\n\n\n\n\ntrace2=go.Scatter(\nx=year+['2021年','2022年','2023年'],\ny=regr.predict(XXX_),\nmode='lines',\nname="预测销量"#第一个图例名称\n)\n\n\ntrace3=go.Scatter(\nx=['2021年'],\ny=[regr.predict(XXX_)[-1]],\nmode='markers',\nname="2021年预测销量"#第一个图例名称\n)\n\n\ndata=[trace1,trace2,trace3]\n\n\nlayout=go.Layout(title='天猫淘宝双十一历年销量',\nxaxis_title='年份',\nyaxis_title='销量')\n\n\nfig=go.Figure(data=data,layout=layout)\n\n\nfig.show()
<divid="3151a044-f334-4544-8e20-b4908350e140"class="plotly-graph-div"style="height:525px;width:100%;"></div>\n<scripttype="text/javascript">\nrequire(["plotly"],function(Plotly){\nwindow.PLOTLYENV=window.PLOTLYENV||{};\n\n\nif(document.getElementById("3151a044-f334-4544-8e20-b4908350e140")){\nPlotly.newPlot(\n'3151a044-f334-4544-8e20-b4908350e140',\n[{"mode":"markers","name":"\\u5b9e\\u9645\\u9500\\u91cf","type":"scatter","x":["2009\\u5e74","2010\\u5e74","2011\\u5e74","2012\\u5e74","2013\\u5e74","2014\\u5e74","2015\\u5e74","2016\\u5e74","2017\\u5e74","2018\\u5e74","2019\\u5e74","2020\\u5e74","2021\\u5e74","2022\\u5e74","2023\\u5e74"],"y":[0.5,9.36,34.0,191.0,350.0,571.0,912.0,1207.0,1682.0,2135.0,2684.0,4982.0]},{"mode":"lines","name":"\\u9884\\u6d4b\\u9500\\u91cf","type":"scatter","x":["2009\\u5e74","2010\\u5e74","2011\\u5e74","2012\\u5e74","2013\\u5e74","2014\\u5e74","2015\\u5e74","2016\\u5e74","2017\\u5e74","2018\\u5e74","2019\\u5e74","2020\\u5e74","2021\\u5e74","2022\\u5e74","2023\\u5e74"],"y":[-31.73938915412782,84.24415467459653,47.98135953421206,98.6884039599804,304.45773756556355,625.1325380574217,975.1811682492444,1286.5716330763207,1571.64603660996,1985.995039071935,2891.332313848736,4918.369004506214,9029.688181803827]},{"mode":"markers","name":"2021\\u5e74\\u9884\\u6d4b\\u9500\\u91cf","type":"scatter","x":["2021\\u5e74"],"y":[9029.688181803827]}],\n{"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"title":{"text":"\\u5929\\u732b\\u6dd8\\u5b9d\\u53cc\\u5341\\u4e00\\u5386\\u5e74\\u9500\\u91cf"},"xaxis":{"title":{"text":"\\u5e74\\u4efd"}},"yaxis":{"title":{"text":"\\u9500\\u91cf"}}},\n{"responsive":true}\n).then(function(){
vargd=document.getElementById('3151a044-f334-4544-8e20-b4908350e140');\nvarx=newMutationObserver(function(mutations,observer){{\nvardisplay=window.getComputedStyle(gd).display;\nif(!display||display==='none'){{\nconsole.log([gd,'removed!']);\nPlotly.purge(gd);\nobserver.disconnect();\n}}\n}});\n\n\n//Listenfortheremovalofthefullnotebookcells\nvarnotebookContainer=gd.closest('#notebook-container');\nif(notebookContainer){{\nx.observe(notebookContainer,{childList:true});\n}}\n\n\n//Listenfortheclearingofthecurrentoutputcell\nvaroutputEl=gd.closest('.output');\nif(outputEl){{\nx.observe(outputEl,{childList:true});\n}}
})\n};\n});\n</script>\n</div>
NO.06、多项式预测的次数到底如何选择
在选择模型中的次数方面,可以通过设置程序,循环计算各个次数下预测误差,然后再根据结果反选参数。
df_new=df.copy()\ndf_new['年份int']=df['年份int'].apply(lambdax:x[0])\ndf_new
.dataframetbodytrth{\nvertical-align:top;\n}\n\n\n.dataframetheadth{\ntext-align:right;\n}
#多项式回归预测次数选择\n#计算m次多项式回归预测结果的MSE评价指标并绘图\nfromsklearn.pipelineimportmake_pipeline\nfromsklearn.metricsimportmean_squared_error\n\n\ntrain_df=df_new[:int(len(df)*0.95)]\ntest_df=df_new[int(len(df)*0.5):]\n\n\n#定义训练和测试使用的自变量和因变量\ntrain_x=train_df['年份int'].values\ntrain_y=train_df['销量'].values\n#print(train_x)\n\n\ntest_x=test_df['年份int'].values\ntest_y=test_df['销量'].values\n\n\ntrain_x=train_x.reshape(len(train_x),1)\ntest_x=test_x.reshape(len(test_x),1)\ntrain_y=train_y.reshape(len(train_y),1)\n\n\nmse=[]#用于存储各最高次多项式MSE值\nm=1#初始m值\nm_max=10#设定最高次数\nwhilem<=m_max:\nmodel=make_pipeline(PolynomialFeatures(m,include_bias=False),LinearRegression())\nmodel.fit(train_x,train_y)#训练模型\npre_y=model.predict(test_x)#测试模型\nmse.append(mean_squared_error(test_y,pre_y.flatten()))#计算MSE\nm=m+1\n\n\nprint("MSE计算结果:",mse)\n#绘图\nplt.plot([iforiinrange(1,m_max+1)],mse,'r')\nplt.scatter([iforiinrange(1,m_max+1)],mse)\n\n\n#绘制图名称等\nplt.title("MSEofmdegreeofpolynomialregression")\nplt.xlabel("m")\nplt.ylabel("MSE")
MSE计算结果:[1088092.9621201046,481951.27857828484,478840.8575107471,477235.9140442428,484657.87153138855,509758.1526412842,344204.1969956556,429874.9229308078,8281846.231771571,146298201.8473966]
Text(0,0.5,'MSE')
从误差结果可以看到,次数取2到8误差基本稳定,没有明显的减少了,但其实你试试就知道,次数选择3的时候,预测的销量是6213亿元,次数选择5的时候,预测的销量是9029亿元,对于销售量来说,这个范围已经够大的了。我也就斗胆猜到9029亿元,我的胆量也就预测到这里了,破万亿就太夸张了,欢迎胆子大的同学留下你们的预测结果,让我们11月11日,拭目以待吧。
NO.07、总结最后