Python数据分析案例24——基于深度学习的锂电池寿命预测

本文介绍: 使用模态分解和随机森林回归，再结合循环神经网络(LSTM,GRU)预测，进行锂电池寿命预测

本期开始案例较为硬核起来了，适合理工科的硕士，人文社科的同学可以看前面的案例。

这篇文章是去年就发了，刊物也印刷了，现在分享一部分代码作为案例给需要的同学。

原文链接（知网文章 C核）：

锂离子电池剩余使用寿命(RUL)是电池健康管理的一个重要指标。本文采用电池容量作为健康状况的指标，使用模态分解和机器学习算法，提出了一种CEEMDAN-RF-SED-LSTM方法去预测锂电池RUL。

import os
import math
import datetime
import random as rn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams ['font.sans-serif'] ='SimHei'               #显示中文
plt.rcParams ['axes.unicode_minus']=False               #显示负号

from PyEMD import EMD,CEEMDAN,Visualisation 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

import tensorflow as tf
import keras
from keras.models import Model, Sequential
from keras.layers import GRU, Dense,Conv1D, MaxPooling1D,GlobalMaxPooling1D,Embedding,Dropout,Flatten,SimpleRNN,LSTM
#from keras.callbacks import EarlyStopping
#from tensorflow.keras import regularizers
#from keras.utils.np_utils import to_categorical
from tensorflow.keras  import optimizers

data0=pd.read_csv('NASA电容量.csv',usecols=['B0006'])
S1 = data0.values
S = S1[:,0]
t = np.arange(0,len(S),1)  
ceemdan=CEEMDAN()
ceemdan.ceemdan(S)
imfs, res = ceemdan.get_imfs_and_residue()
print(len(imfs))
vis = Visualisation()
vis.plot_imfs(imfs=imfs, residue=res, t=t , include_residue=False)

df=pd.DataFrame(imfs.T,columns=['imf'+str(i+1) for i in range(len(imfs))])
df['capacity']=data0.values
X_train=df.iloc[:,:-1]
y_train=df.iloc[:,-1]
model = RandomForestRegressor(n_estimators=5000, max_features=2, random_state=0)
model.fit(X_train, y_train)
model.score(X_train, y_train)

model.feature_importances_
sorted_index = model.feature_importances_.argsort()
plt.barh(range(X_train.shape[1]), model.feature_importances_[sorted_index])
plt.yticks(np.arange(X_train.shape[1]), X_train.columns[sorted_index])
plt.xlabel('Feature Importance')
plt.ylabel('Feature')
plt.title('Random Forest')
plt.tight_layout()

imf_names=X_train.columns[sorted_index][::-1]
imf_weight=model.feature_importances_[sorted_index][::-1]
imf_weight[0]=1
#imf_names,imf_weight

def set_my_seed():
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(1)
    rn.seed(12345)
    tf.random.set_seed(123)
    
def evaluation(y_test, y_predict):
    mae = mean_absolute_error(y_test, y_predict)
    mse = mean_squared_error(y_test, y_predict)
    rmse = math.sqrt(mean_squared_error(y_test, y_predict))
    mape=(abs(y_predict -y_test)/ y_test).mean()
    return mae, rmse, mape

def relative_error(y_test, y_predict, threshold):
    true_re, pred_re = len(y_test), 0
    for i in range(len(y_test)-1):
        if y_test[i] <= threshold >= y_test[i+1]:
            true_re = i - 1
            break
    for i in range(len(y_predict)-1):
        if y_predict[i] <= threshold:
            pred_re = i - 1
            break
    return abs(true_re - pred_re)/true_re

def build_sequences(text, window_size=4):
    #text:list of capacity
    x, y = [],[]
    for i in range(len(text) - window_size):
        sequence = text[i:i+window_size]
        target = text[i+window_size]
        x.append(sequence)
        y.append(target)
    return np.array(x), np.array(y)
def get_traintest(data,train_size=len(data0),window_size=4):
    train=data[:train_size]
    test=data[train_size-window_size:]
    X_train,y_train=build_sequences(train,window_size=window_size)
    X_test,y_test=build_sequences(test)
    return X_train,y_train,X_test,y_test

def build_model(X_train,mode='LSTM',hidden_dim=[32,16]):
    set_my_seed()
    model = Sequential()
    if mode=='RNN':
        #RNN
        model.add(SimpleRNN(hidden_dim[0],return_sequences=True, input_shape=(X_train.shape[-2],X_train.shape[-1])))
        model.add(SimpleRNN(hidden_dim[1]))     
        
    elif mode=='MLP':
        model.add(Dense(hidden_dim[0],activation='relu',input_shape=(X_train.shape[-1],)))
        model.add(Dense(hidden_dim[1],activation='relu'))
        
    elif mode=='LSTM':
        # LSTM
        model.add(LSTM(hidden_dim[0],return_sequences=True, input_shape=(X_train.shape[-2],X_train.shape[-1])))
        model.add(LSTM(hidden_dim[1]))
    elif mode=='GRU':
        #GRU
        model.add(GRU(hidden_dim[0],return_sequences=True, input_shape=(X_train.shape[-2],X_train.shape[-1])))
        model.add(GRU(hidden_dim[1]))
    elif mode=='CNN':
        #一维卷积
        model.add(Conv1D(hidden_dim[0],3,activation='relu',input_shape=(X_train.shape[-2],X_train.shape[-1])))
        model.add(GlobalMaxPooling1D())
        
    model.add(Dense(1))
    model.compile(optimizer='Adam', loss='mse',metrics=[tf.keras.metrics.RootMeanSquaredError(),"mape","mae"])
    return model

def plot_loss(hist,imfname):
    plt.subplots(1,4,figsize=(16,2))
    for i,key in enumerate(hist.history.keys()):
        n=int(str('14')+str(i+1))
        plt.subplot(n)
        plt.plot(hist.history[key], 'k', label=f'Training {key}')
        plt.title(f'{imfname} Training {key}')
        plt.xlabel('Epochs')
        plt.ylabel(key)
        plt.legend()
    plt.tight_layout()
    plt.show()
    
def evaluation_all(df_RFW_eval_all,df_eval_all,mode,Rated_Capacity=2,show_fit=True):
    df_RFW_eval_all['all_pred']=df_RFW_eval_all.iloc[:,1:].sum(axis=1)
    df_eval_all['all_pred']=df_eval_all.iloc[:,1:].sum(axis=1)

    MAE1,RMSE1,MAPE1=evaluation(df_RFW_eval_all['capacity'],df_RFW_eval_all['all_pred'])
    RE1=relative_error(df_RFW_eval_all['capacity'],df_RFW_eval_all['all_pred'],threshold=Rated_Capacity*0.7)

    MAE2,RMSE2,MAPE2=evaluation(df_eval_all['capacity'],df_eval_all['all_pred'])
    RE2=relative_error(df_eval_all['capacity'],df_eval_all['all_pred'],threshold=Rated_Capacity*0.7)

    df_RFW_eval_all.rename(columns={'all_pred':'predict','capacity':'actual'},inplace=True)
    if show_fit:
        df_RFW_eval_all.loc[:,['predict','actual']].plot(figsize=(10,4),title=f'CEEMDAN+RF+{mode}的拟合效果')

    print(f'CEEMDAN+RF+{mode}的效果为mae:{MAE1}, rmse:{RMSE1} ,mape:{MAPE1}, re:{RE1}')
    print(f'CEEMDAN+{mode}的效果为mae:{MAE2}, rmse:{RMSE2} ,mape:{MAPE2}, re:{RE2}')

def train_fuc(mode='LSTM',window_size=8,batch_size=32,epochs=100,hidden_dim=[32,16],Rated_Capacity=2,show_imf=False,show_loss=True,show_fit=True):
    df_RFW_eval_all=pd.DataFrame(df['capacity'])
    df_eval_all=pd.DataFrame(df['capacity'])
    for i,imfname in  enumerate(imf_names):
 
        print(f'正在处理分量信号：{imfname}')
        data=df[imfname]
        X_train,y_train,X_test,y_test=get_traintest(data.values,window_size=window_size,train_size=len(data))
        if mode!='MLP':
            X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
        #print(X_train.shape, y_train.shape)
        start = datetime.datetime.now()
        set_my_seed()
        model=build_model(X_train=X_train,mode=mode,hidden_dim=hidden_dim)
        hist=model.fit(X_train, y_train,batch_size=batch_size,epochs=epochs,verbose=0)
        if show_loss:
            plot_loss(hist,imfname)
        #预测
        point_list = list(data[:window_size].values.copy())
        y_pred=[]
        while (len(point_list)) < len(data.values):
            x = np.reshape(np.array(point_list[-window_size:]), (-1, window_size)).astype(np.float32)
            pred = model.predict(x) 
            next_point = pred[0,0]
            point_list.append(next_point)#加入原来序列用来继续预测下一个点
            #point_list.append(next_point)#保存输出序列最后一个点的预测值
        y_pred.append(point_list)#保存本次预测所有的预测值
        y_pred=np.array(y_pred).T
        #print(y_pred.shape)
        end = datetime.datetime.now()
        
        if show_imf:
            df_eval=pd.DataFrame()
            df_eval['actual']=data.values
            df_eval['pred']=y_pred
        mae, rmse, mape=evaluation(y_test=data.values, y_predict=y_pred)
        print(f'{imfname}该分量的效果：mae:{mae}, rmse:{rmse} ,mape:{mape}')
        df_eval_all[imfname+'_w_pred']=y_pred
        df_RFW_eval_all[imfname+'_w_pred']=y_pred*imf_weight[i]
        print('============================================================================================================================')
    evaluation_all(df_RFW_eval_all,df_eval_all,mode=mode,Rated_Capacity=Rated_Capacity,show_fit=show_fit)
    print(f'running time is {end-start}')

window_size=8
batch_size=16
epochs=100
hidden_dim=[32,16]
Rated_Capacity=2
show_fit=True
show_loss=True
mode='LSTM'  #RNN,GRU,CNN

mode='LSTM' 
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=batch_size,epochs=epochs,hidden_dim=hidden_dim,Rated_Capacity=Rated_Capacity)

train_fuc(window_size=16)

train_fuc(hidden_dim=[64,32])

mode='RNN' 
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=32,epochs=epochs,hidden_dim=hidden_dim,Rated_Capacity=Rated_Capacity)

mode='GRU' 
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=batch_size,epochs=epochs,hidden_dim=hidden_dim,Rated_Capacity=Rated_Capacity)

mode='CNN' 
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=batch_size,epochs=epochs,hidden_dim=hidden_dim,Rated_Capacity=Rated_Capacity)

mode='MLP' 
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=batch_size,epochs=90,hidden_dim=hidden_dim,Rated_Capacity=Rated_Capacity)