微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

如何使用R创建功能并将其合并到预测模型中以改善预测ARIMA,Prophet?

如何解决如何使用R创建功能并将其合并到预测模型中以改善预测ARIMA,Prophet?

以下数据集为期5年,要求预测下一年第一个月的现金变化。我在R中尝试了认的Auto Arima和Prophet库,但RMSE非常差。我还尝试在每月汇总数据(选定的列)后运行Arima,结果更好一些,但仍然低于标准水平。

'Branch code    company_name    GL_account  GL_account_description  account_id  document_type   name_alpha_explanation  name_remark_explanation address_number  address_name    partition_ledger_year_month date_general_ledger amount_usd
1   ABC 1121    Bank A- USD 128000  BD  Bank Deposit    Rec. fr. XYZ customer   354656  XYZ Company Inc    USD  201807  7/17/2018   5000
1   ABC 1122    Bank B-EURO 129000  FX  Xchange Gain/loss       34545       201808  8/30/2018   25000
2   XYZ 2100    Bank W- USD 750000  CS  S-002-USD-99999 NWS-KE-14-NNAP-05-05-15-CH-Kan  0       201510  10/9/2015   300000
2   XYZ 2100    Bank X - AUD    750100  RC  Bank Deposit    Rec fr WEnergy Ltd  252520  Woodside Energy Ltd                AUD  201902  2/13/2019   430
2   XYZ 2100    Bank W- USD 750000  JZ  JPM 14/09/2015  INTEREST INCOME 0       201509  9/14/2015   35005
3   ZTL 3001    JP Morgan - USD 99501   PK  UK Limited      1   UK Limited          USD 201511  11/25/2015  2219

image of data if difficult to read above

每天自动Arima代码

library(dplyr)
library(lubridate)
library(tidyr)
df <- read.csv("transactions2015-June2019.csv")
df <- subset(df,select = c('account_id','date_general_ledger','amount_usd'))
df2 <- aggregate(amount_usd~date_general_ledger+account_id,df,sum)
dates <- as.character(seq(as.Date('2015-01-01'),as.Date('2019-06-30'),by = "day"))
dates <- as.data.frame(dates)
names(dates) <- "date_general_ledger"
dates$date_general_ledger <- ymd(dates$date_general_ledger)
class(dates$date_general_ledger)
df2$date_general_ledger <- mdy(df2$date_general_ledger)
class(df2$date_general_ledger)
df3 <- tidyr::complete(df2,account_id,date_general_ledger = dates$date_general_ledger,fill = list(amount_usd = 0))
df3[is.na(df3)] <- 0
tail(df3)
library (tseries)
library (forecast)
loopvec <- c(unique(df$account_id))
df_new2 <- data.frame()  
df_loopitem2 <- data.frame() 
for (loopitem in loopvec){
  df4 <- subset(df3,account_id==loopitem) 
  df4 <- as.data.frame(df4)
  x = ts(df4$amount_usd,start = c(2015,1,1),frequency = 365)
  arima1 = auto.arima(x)
  forecast1 = forecast(arima1,h = 31)
  df_new <- as.data.frame(forecast1)
  df_new2 <- rbind(df_new,df_new2)
  df_loopitem <- as.data.frame(loopitem)
  df_loopitem2 <- rbind(df_loopitem,df_loopitem2)
}

tail(df_loopitem2)
df_loopitem3 <- as.data.frame(do.call(c,lapply(df_loopitem2,function(x) rep(x,31))))
names(df_loopitem3) <- 'account_id'
df_new3 <- cbind(df_loopitem3,df_new2$`Point Forecast`)
tail(df_new3)
names(df_new3) <- c("account_id","amount_usd")
df_upload <- aggregate(amount_usd~account_id,df_new3,sum)

每天的先知代码

library(dplyr)
library(lubridate)
library(tidyr)
df <- read.csv("transactions2015-June2019.csv")
df <- subset(df,fill = list(amount_usd = 0))
df3[is.na(df3)] <- 0
library (tseries)
library (forecast)
library(prophet)
library(dplyr)
library(lubridate)
loopvec <- c(unique(df$account_id))
df_new2 <- data.frame()  
df_loopitem2 <- data.frame()  
for (loopitem in loopvec){
  df4 <- subset(df3,account_id==loopitem)
  df5 <- df4[,c("date_general_ledger","amount_usd")]
  names(df5)[names(df5)=="date_general_ledger"] <- "ds"
  names(df5)[names(df5)=="amount_usd"] <- "y"
  try <- prophet(df5)
  future <- make_future_dataframe(try,periods = 31,freq = "day")
  forecast <- predict(try,future)
  forecast <- forecast[c('ds','yhat')]
  df_new <- as.data.frame(forecast)
  df_new2 <- rbind(df_new,df_loopitem2)
  
}
df_loopitem3 <- as.data.frame(do.call(c,1673))))
names(df_loopitem3) <- 'account_id'
df_finalprophet <- cbind(df_loopitem3,df_new2$ds,df_new2$yhat)
df_finalfilter <- subset(df_finalprophet,df_new2$ds > as.Date("2019-06-30") )
df_finalfilter2 <- as.data.frame(cbind(df_finalfilter$account_id,df_finalfilter$`df_new2$yhat`))
names(df_finalfilter2) <- c("account_id",df_finalfilter2,sum)

Arima-每月

library(dplyr)
library(lubridate)
library(tidyr)
df <- read.csv("monthly_data_training.csv")
df <- mutate_at(df,"partition_ledger_year_month",~ymd(paste(.,"01")))
df <- tidyr::complete(df,partition_ledger_year_month,fill = list(amount_usd = 0))
library (tseries)
library (forecast)
loopvec <- c(unique(df$account_id))
df_new2 <- data.frame()

for (loopitem in loopvec){
  df2 <- subset(df,account_id==loopitem) 
  x = ts(df2$amount_usd,end = c(2019,6),frequency = 12)
  arima1 = auto.arima(x)
  forecast1 = forecast(arima1,h = 1)
  df_new <- data.frame(forecast1)
  df_new2 <- rbind(df_new,df_new2)
}
df_new2 <- df_new2[rev(1:nrow(df_new2)),]
df_new3 <- cbind(loopvec,df_new2$Point.Forecast)
df_new3 <- as.data.frame(df_new3)
names(df_new3) <- c("account_id","amount_usd")
write.csv(df_new3,file = 'submission.csv',sep = " ",col.names = TRUE,row.names = FALSE)

感谢您的帮助,以优化预测结果。我收到了一些计算趋势,方差或滞后特征的建议,但不确定如何计算并将其合并到上述代码中。

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。