如何套用已知R代碼尋找資料斷點-有解無憂

實驗獲取了一系列時間序列資料（包含缺失值），通過文獻查找找到一個時間序列分割的方法想用在自己的資料上，但是由于統計基礎薄弱不知如何下手。統計小白，請大神們幫忙看看。（也歡迎熱心大神聯系：QQ：1434493937，非常感謝）
參考文獻：https://www.sciencedirect.com/science/article/pii/S0169809519315972
統計方法R代碼：

N_break_point <- function(serie, n_max = 1, n_period=10,
                          seed=FALSE, auto_select = FALSE,
                          alpha = NULL,method='SNHT',dstr='norm'){
  # select method
  if(exists(x = '.Random.seed')){
    old_random <- .Random.seed
  }
  set.seed(9658)
  if(!is.logical(seed)){
    if(length(seed) != n_max){
      stop('The given seed is not supported. If seed is given must be of length n_max')
    }
  }

  {
    if( method == 'pettit'){
      fun <- pettit
    }else if( method == 'student'){
      fun <- stu
    }else if( method == 'mann-whitney'){
      fun <- man.whi
    }else if( method == 'buishand'){
      fun <- function(x,n_period){
        return(Buishand_R(serie = x,n_period = n_period,dstr = dstr))
      }
    }else if( method == 'SNHT'){
      fun <- function(x,n_period){
        return(SNHT(serie = x,n_period = n_period,dstr = dstr))
      }
    }else{stop('Not supported method')}
  }

  target <- as.vector(serie)
  n_targ <- length(target)
  isna <- as.numeric(is.na(target))
  n_period_2 <- as.integer(n_period)
  ii <- c(rep(0,n_period_2),rollapply(isna,width=n_period+1,sum),rep(0,n_period_2))
  ii <- which(ii >= (n_period_2/2))
  if(length(ii) > 1){
    na_break <- c(ii , n_targ+1)
    ii_aux <- ii[2:length(ii)] - ii[1:(length(ii)-1)]
    ii_aux <- ii_aux < 10
    jump <- c(ii[1]<n_period_2,ii_aux,n_targ+1-ii[length(ii)]<n_period_2)
  }else if(length(ii) == 1){
    na_break <- c(ii , n_targ+1)
    jump <- c(ii<n_period_2,n_targ+1-ii<n_period_2)
  }else{
    na_break <- n_targ+1
    jump <- FALSE
  }
  new_target <- target
  new_n_targ <- n_targ
  output <- list()
  n_max_new <- n_max
  outputcont <- 0
  for(new_serie in 1:length(na_break)){
    n_max <- n_max_new
    if(new_serie == 1){
      if(jump[1]){next}
      ini <- 0
      target <- new_target[1:(na_break[new_serie]-1)]
    }else{
      if(jump[new_serie]){next}
      ini <- na_break[new_serie-1]-1
      target <- new_target[na_break[new_serie-1]:(na_break[new_serie]-1)]
    }
    outputcont <- outputcont +1

    n_targ <- length(target)
    if((n_max+1)*n_period > n_targ-2){
      n_max <- n_targ%/%n_period-1
      if(n_max < 1){
        if(length(na_break) > 1){
          warning(('Not possible to find breakpoints in part of the serie, too short'))
          outputcont <- outputcont - 1; next
        }else{
          stop('Not possible to find breakpoints, target serie too short')
        }
      }
      warning(paste0('the given n is too big for the target and n_period length, ',n_max, ' will be use as maximal amount of breakpoints') )
    }
    output_aux <- list(breaks = list(),p.value=https://bbs.csdn.net/topics/list(),n=list())
    for(n in 1:n_max){
      if(is.logical(seed)){
        breaks <- as.integer(1:n * (n_targ/(n+1)))+1
      }else{
        if(length(seed[[n]])==n){
          breaks <- seed[[n]]
        }else{
          warning(paste('The seed provided at',n,'breaks differs in length equal space break seed will be use instead',sep=' '))
          breaks <- as.integer(1:n * (n_targ/(n+1)))+1
        }
      }
      breaks <- sort(breaks,decreasing = F)
      p <- rep(1,length(breaks))
      breaks_old <- rep(0,length(breaks))
      if(n == 1){
        ff <- fun(target,n_period)
        breaks <- ff$breaks
        p <- ff$p.value
      }else {
        no_problem <- T
        iters <- 0
        breaks_old_old_old <-breaks
        breaks_old_old <-breaks
        p_old_old <- p
        p_old <- p
        while (any(breaks_old != breaks) & no_problem){
          iters <- iters + 1
          breaks_old_old_old <- breaks_old_old
          breaks_old_old <- breaks_old
          breaks_old <- breaks
          p_old_old <- p_old
          p_old <- p
          for(i in 1:n){
            if(i == 1){
              aux <- target[1:(breaks[2]-1)]
              break_aux <- 0
            }else if(i == n){
              aux <- target[breaks[n-1]:n_targ]
              break_aux <- breaks[n-1]-1
            }else{
              aux <- target[breaks[i-1]:(breaks[i+1]-1)]
              break_aux <- breaks[i-1]-1
            }

            ff <- fun(aux,n_period)
            breaks[i] <- ff$breaks + break_aux
            p[i] <- ff$p.value
          }
          if(iters > 3){
            if(all(breaks==breaks_old_old)){
              no_problem <- F
              warning(paste0('several critical point found at n =', n))
              breaks <- NULL
              next
            }else if(all(breaks==breaks_old_old_old)){
              no_problem <- F
              warning(paste0('several critical point found at n =', n))
              breaks <- NULL
              next
            }
          }
        }
      }
      if(is.null(breaks)){
        output_aux$breaks[[n]] <- NA
        output_aux$p.value[[n]] <- 1
        output_aux$n[[n]] <- n
      }else{
        output_aux$breaks[[n]] <- breaks+ini
        output_aux$p.value[[n]] <- p
        output_aux$n[[n]] <- n
      }
    }
    output[[outputcont]] <- output_aux
  }

  if(auto_select){
    output_new <- output
    output <- list(breaks = NULL,p.value=https://bbs.csdn.net/topics/NULL,n=NULL)
    cont <-0
    bb <- NULL
    pp_final <- NULL
    n_final <- 0
    for(output_aux in output_new){
      cont <- cont + 1
      n_max <- length(output_aux$p.value)
      pp <- vector(mode = 'double',length = n_max)
      for(i in 1:n_max){
        pp[i] <- max(output_aux$p.value[[i]])
      }
      if(is.null(alpha)){
        i <- which.min(pp)
      } else{
        aa <- 1:n_max
        i <-max(aa[pp < alpha])
        if(is.infinite(i)){next}
      }
      bb <- c(bb,output_aux$breaks[[i]])
      pp_final <- c(pp_final,output_aux$p.value[[i]])
      n_final <- i + n_final
    }
    output <- list(breaks = bb,p.value=https://bbs.csdn.net/topics/pp_final,n=n_final)
  }

  if(exists(x = '.Random.seed')){
    .Random.seed <- old_random
  }
  return(output)

}

轉載請註明出處，本文鏈接：https://www.uj5u.com/qita/36671.html

標籤：其他技術討論專區

上一篇：深度學習中的影像資料預處理——影像去均值（image mean or pixel mean）

下一篇：ImportError: cannot import name '_path' from 'matplotlib' ，如何解決？