mainscntanalysis/secondDbSteps.r

library(DBI)
library(tidyr)
library(dplyr)
library(lubridate)
library(R.utils)


get_freq_df <- function(con, startDate, endDate) {
  startStr <- strftime(startDate, "%Y-%m-%d %H:%M:%S", tz="UTC")
  endStr <- strftime(endDate, "%Y-%m-%d %H:%M:%S", tz="UTC")
  
  # get from database
  res <-dbSendQuery(con, "select time, location, freq from mainsfrequency where valid=1 and time >= $1 and time < $2")
  dbBind(res, list(startStr, endStr))
  frequencies <- dbFetch(res)
  dbClearResult(res)
  
  # get values from all location at one time in a row
  freq_wide <- frequencies %>% 
    pivot_wider(names_from = location, 
                values_from = freq, 
                values_fn = mean)
  
  # remove measurement error (frequency gradient greater than THRESHOLD)
  THRESHOLD <- 0.5
  for (colIdx in 2:length(freq_wide)) {
    last <- freq_wide[[1, colIdx]]
    for (rowIdx in 1:length(freq_wide[[colIdx]])) {
      current <- freq_wide[[rowIdx, colIdx]]
      if (!is.na(current) && !is.na(last) && (abs(current - last) > THRESHOLD)) {
        freq_wide[[rowIdx, colIdx]] = NA
      }
      last <- current
    }
  }

  return (freq_wide)  
}

con <- dbConnect(RPostgres::Postgres(), 
                 dbname='mainscnt', 
                 host='172.16.10.27', 
                 user='wn')


START <- "2021-08-02 00:00:00"
INTERVAL <- 3600

freq_deviation_integrals <- data.frame()

for (offset in 0:23) {
  startDate <- ymd_hms(START) + INTERVAL * offset
  endDate <- startDate + INTERVAL

  
  # get prepared and sanitized data from database
  freq_wide <- get_freq_df(con, startDate, endDate)
  # 
  location_names <- names(freq_wide)[-1]
  
  for (colIdx in 1:length(location_names)) {
    colName.mean <- paste("mean.w.o.", location_names[colIdx], sep="")
    colName.diff <- paste(location_names[colIdx], ".to.mean", sep="")
    freq_wide <- freq_wide %>%
      rowwise() %>%
      mutate(!!colName.mean := mean(c_across(location_names[- colIdx]), na.rm=TRUE)) %>%
      mutate(!!colName.diff := abs(eval(as.name(colName.mean)) - eval(as.name(location_names[colIdx]))) * 100)
    
  }

  means <- freq_wide %>% select(ends_with(".to.mean"))
  sum.means <- apply(means, 2, sum, na.rm=TRUE)
  #printf("start: %s, end: %s\n", startDate, endDate)
  #print(sum.means)
  #printf("\n")

  next.row.no <- nrow(freq_deviation_integrals) + 1
  freq_deviation_integrals[next.row.no, c(1, 2)] <- c(strftime(startDate, "%Y-%m-%d %H:%M:%S", tz="UTC"), strftime(endDate, "%Y-%m-%d %H:%M:%S", tz="UTC"))
  freq_deviation_integrals[next.row.no, c(3:(2 + length(sum.means)))] <- sum.means[order(names(sum.means))]
  
}

names(freq_deviation_integrals) <- c("startDate", "endDate", sort(location_names))


for (colIdx in 1:length(location_names)) {
  freq_deviation_integrals[,ncol(freq_deviation_integrals)+1] <- c(0, diff(freq_deviation_integrals[,2+colIdx],1))
  names(freq_deviation_integrals)[length(location_names)+2+colIdx] = paste("diff", sort(location_names)[colIdx], sep=".")
}


dbDisconnect(con)
second step 2021-08-09 13:38:11 +02:00			`library(DBI)`
			`library(tidyr)`
with loop but not fine 2021-08-09 23:24:42 +02:00			`library(dplyr)`
			`library(lubridate)`
			`library(R.utils)`
second step 2021-08-09 13:38:11 +02:00
with loop but not fine 2021-08-09 23:24:42 +02:00
changes 2021-08-10 18:10:11 +02:00			`get_freq_df <- function(con, startDate, endDate) {`
with loop but not fine 2021-08-09 23:24:42 +02:00			`startStr <- strftime(startDate, "%Y-%m-%d %H:%M:%S", tz="UTC")`
			`endStr <- strftime(endDate, "%Y-%m-%d %H:%M:%S", tz="UTC")`

changes 2021-08-11 16:07:48 +02:00			`# get from database`
with loop but not fine 2021-08-09 23:24:42 +02:00			`res <-dbSendQuery(con, "select time, location, freq from mainsfrequency where valid=1 and time >= $1 and time < $2")`
			`dbBind(res, list(startStr, endStr))`
			`frequencies <- dbFetch(res)`
			`dbClearResult(res)`

changes 2021-08-11 16:07:48 +02:00			`# get values from all location at one time in a row`
with loop but not fine 2021-08-09 23:24:42 +02:00			`freq_wide <- frequencies %>%`
			`pivot_wider(names_from = location,`
			`values_from = freq,`
			`values_fn = mean)`

changes 2021-08-11 16:07:48 +02:00			`# remove measurement error (frequency gradient greater than THRESHOLD)`
with loop but not fine 2021-08-09 23:24:42 +02:00			`THRESHOLD <- 0.5`
			`for (colIdx in 2:length(freq_wide)) {`
			`last <- freq_wide[[1, colIdx]]`
			`for (rowIdx in 1:length(freq_wide[[colIdx]])) {`
			`current <- freq_wide[[rowIdx, colIdx]]`
			`if (!is.na(current) && !is.na(last) && (abs(current - last) > THRESHOLD)) {`
			`freq_wide[[rowIdx, colIdx]] = NA`
			`}`
			`last <- current`
continued 2021-08-09 18:10:31 +02:00			`}`
			`}`
changes 2021-08-10 18:10:11 +02:00
			`return (freq_wide)`
			`}`

			`con <- dbConnect(RPostgres::Postgres(),`
			`dbname='mainscnt',`
changes 2021-08-11 16:07:48 +02:00			`host='172.16.10.27',`
changes 2021-08-10 18:10:11 +02:00			`user='wn')`


changes 2021-08-11 16:07:48 +02:00			`START <- "2021-08-02 00:00:00"`
changes 2021-08-10 18:10:11 +02:00			`INTERVAL <- 3600`

changes 2021-08-11 16:07:48 +02:00			`freq_deviation_integrals <- data.frame()`

			`for (offset in 0:23) {`
changes 2021-08-10 18:10:11 +02:00			`startDate <- ymd_hms(START) + INTERVAL * offset`
			`endDate <- startDate + INTERVAL`

changes 2021-08-11 16:07:48 +02:00
			`# get prepared and sanitized data from database`
changes 2021-08-10 18:10:11 +02:00			`freq_wide <- get_freq_df(con, startDate, endDate)`
changes 2021-08-11 16:07:48 +02:00			`#`
			`location_names <- names(freq_wide)[-1]`
with loop but not fine 2021-08-09 23:24:42 +02:00
changes 2021-08-11 16:07:48 +02:00			`for (colIdx in 1:length(location_names)) {`
			`colName.mean <- paste("mean.w.o.", location_names[colIdx], sep="")`
			`colName.diff <- paste(location_names[colIdx], ".to.mean", sep="")`
changes 2021-08-10 18:10:11 +02:00			`freq_wide <- freq_wide %>%`
			`rowwise() %>%`
changes 2021-08-11 16:07:48 +02:00			`mutate(!!colName.mean := mean(c_across(location_names[- colIdx]), na.rm=TRUE)) %>%`
			`mutate(!!colName.diff := abs(eval(as.name(colName.mean)) - eval(as.name(location_names[colIdx]))) * 100)`
changes 2021-08-10 18:10:11 +02:00
			`}`

changes 2021-08-11 10:52:53 +02:00			`means <- freq_wide %>% select(ends_with(".to.mean"))`
changes 2021-08-11 16:07:48 +02:00			`sum.means <- apply(means, 2, sum, na.rm=TRUE)`
			`#printf("start: %s, end: %s\n", startDate, endDate)`
			`#print(sum.means)`
			`#printf("\n")`

			`next.row.no <- nrow(freq_deviation_integrals) + 1`
			`freq_deviation_integrals[next.row.no, c(1, 2)] <- c(strftime(startDate, "%Y-%m-%d %H:%M:%S", tz="UTC"), strftime(endDate, "%Y-%m-%d %H:%M:%S", tz="UTC"))`
			`freq_deviation_integrals[next.row.no, c(3:(2 + length(sum.means)))] <- sum.means[order(names(sum.means))]`
changes 2021-08-11 10:52:53 +02:00
continued 2021-08-09 18:10:31 +02:00			`}`
changes 2021-08-11 16:07:48 +02:00
			`names(freq_deviation_integrals) <- c("startDate", "endDate", sort(location_names))`


			`for (colIdx in 1:length(location_names)) {`
			`freq_deviation_integrals[,ncol(freq_deviation_integrals)+1] <- c(0, diff(freq_deviation_integrals[,2+colIdx],1))`
			`names(freq_deviation_integrals)[length(location_names)+2+colIdx] = paste("diff", sort(location_names)[colIdx], sep=".")`
			`}`


continued 2021-08-09 18:10:31 +02:00			`dbDisconnect(con)`
changes 2021-08-10 18:10:11 +02:00