Take-home exercise 4

ISSS608 Visual Analytics Take-home Exercise 4

Jiarui Cui www.linkedin.com/in/jiarui-cui-482232195 (School of Computing and Information Systems)https://scis.smu.edu.sg/
2022-05-23

Overview

This take-home exercise aims to reveal the daily routines of two selected participant of the city of Engagement, Ohio USA by using use ViSIElse and other appropriate visual analytics methods.

Importing Packages

In this take-home exercise, the following R packages will be used: - sf, an R package specially designed to handle geospatial data in simple feature objects.

packages = c('lubridate', 'tidyverse', 
             'readr','tmap','sf','sftime',
             'clock','rmarkdown','ViSiElse',
             'fastDummies','ggplot2')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

Importing data

logs_fread <- list.files(path = "Data/ActivityLogs/",
                  pattern = "*.csv", 
                  full.names = T) %>% 
  map_df(~fread(.))
buildings <- read_sf("Data/Buildings.csv", 
                   options = "GEOM_POSSIBLE_NAMES=location")

Data preparation

Create label date, time, and hours.

logs_fread$month <- factor(month(logs_fread$`timestamp`), 
                    levels=1:12, 
                    labels=month.abb, 
                    ordered=TRUE) 

logs_March <- logs_fread %>% filter(month == 'Mar')
logs_March$date <- as_date(logs_March$`timestamp`)
logs_March$time <- format(logs_March$timestamp, format = "%H:%M:%S")
logs_March$hour <-hour(logs_March$`timestamp`)

Change the time into numerical value - number of minutes

log_participant4 = logs_March %>% mutate(date = date(timestamp),
         time = (hour(timestamp)*60+minute(timestamp))) %>%
  filter(participantId== 4)
log_participant16 = logs_March %>% mutate(date = date(timestamp),
         time = (hour(timestamp)*60+minute(timestamp))) %>%
  filter(participantId== 16)

Calculate their daily average balance.

participant4_balance <- log_participant4%>% 
  group_by(date) %>% 
  summarize(Balance = mean(availableBalance))

participant16_balance <- log_participant16%>% 
  group_by(date) %>% 
  summarize(Balance = mean(availableBalance))

Balance comparison

The line chart below presents the available balance for participant 4 and 16 throughout March 2022.

balance_comparison <- merge(participant4_balance,participant16_balance,
                            by="date")
ggplot(balance_comparison, aes(date)) + 
  geom_line(aes(y = Balance.x, colour = "participant 4")) + 
  geom_line(aes(y = Balance.y, colour = "participant 16"))

Daily routine comparison

grouped4 <- log_participant4 %>% 
  count(hour,currentMode) %>% 
  ungroup() %>%
  na.omit()
ggplot(grouped4, 
       aes(hour,currentMode, 
           fill = n)) + 
geom_tile(color = "white", 
          size = 0.1) + 
coord_equal() +
scale_fill_gradient(name = "Frequencies",
                    low = "sky blue", 
                    high = "dark blue") +
labs(x = 'Hour', 
     y = NULL, 
     title = "Monthly routine for participant 4") +
theme(axis.ticks = element_blank(),
      plot.title = element_text(hjust = 0.5),
      legend.title = element_text(size = 4),
      legend.text = element_text(size = 3) )
grouped16 <- log_participant16 %>% 
  count(hour,currentMode) %>% 
  ungroup() %>%
  na.omit()
ggplot(grouped16, 
       aes(hour,currentMode, 
           fill = n)) + 
geom_tile(color = "white", 
          size = 0.1) + 
coord_equal() +
scale_fill_gradient(name = "Frequencies",
                    low = "sky blue", 
                    high = "dark blue") +
labs(x = 'Hour', 
     y = NULL, 
     title = "Monthly routine for participant 16") +
theme(axis.ticks = element_blank(),
      plot.title = element_text(hjust = 0.5),
      legend.title = element_text(size = 4),
      legend.text = element_text(size = 3) )

get_home_time4 <- (log_participant4%>%
          filter(currentMode == 'AtHome'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(get_home = min(time))

leave_home_time4 <- (log_participant4%>%
          filter(currentMode == 'AtHome'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(leave_home = max(time))

start_travel_time4 <- (log_participant4%>%
          filter(currentMode == 'Transport'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(start_travel = min(time))

end_travel_time4 <-(log_participant4%>%
          filter(currentMode == 'Transport'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(end_travel = max(time))

start_work_time4 <- (log_participant4%>%
          filter(currentMode == 'AtWork'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(start_work = min(time))

end_work_time4 <-  (log_participant4%>%
          filter(currentMode == 'AtWork'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(end_work  = max(time))

get_restaurant_time4 <-  (log_participant4%>%
          filter(currentMode == 'AtRestaurant'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(get_restaurant = min(time))

leave_restaurant_time4 <- (log_participant4%>%
          filter(currentMode == 'AtRestaurant'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(leave_restaurant  = max(time))

start_recreation_time4 <- (log_participant4%>%
          filter(currentMode == 'AtRecreation'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(start_recreation = min(time))

end_recreation_time4 <- (log_participant4%>%
          filter(currentMode == 'AtRecreation'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(end_recreation  = max(time))

get_home_time16 <- (log_participant16%>%
          filter(currentMode == 'AtHome'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(get_home = min(time))

leave_home_time16 <- (log_participant16%>%
          filter(currentMode == 'AtHome'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(leave_home = max(time))

start_travel_time16 <- (log_participant16%>%
          filter(currentMode == 'Transport'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(start_travel = min(time))

end_travel_time16 <-(log_participant16%>%
          filter(currentMode == 'Transport'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(end_travel = max(time))

start_work_time16 <- (log_participant16%>%
          filter(currentMode == 'AtWork'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(start_work = min(time))

end_work_time16 <-  (log_participant16%>%
          filter(currentMode == 'AtWork'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(end_work  = max(time))

get_restaurant_time16 <-  (log_participant16%>%
          filter(currentMode == 'AtRestaurant'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(get_restaurant = min(time))

leave_restaurant_time16 <- (log_participant16%>%
          filter(currentMode == 'AtRestaurant'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(leave_restaurant  = max(time))

start_recreation_time16 <- (log_participant16%>%
          filter(currentMode == 'AtRecreation'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(start_recreation = min(time))

end_recreation_time16 <- (log_participant16%>%
          filter(currentMode == 'AtRecreation'))[, c('date','time')]%>%
  group_by(date) %>% 
  summarize(end_recreation  = max(time))
participant4_list <- list(get_home_time4, leave_home_time4, 
              start_travel_time4,end_travel_time4,
              start_work_time4,end_work_time4,
              get_restaurant_time4,leave_restaurant_time4,
              start_recreation_time4,end_recreation_time4)
participant4_routine <- participant4_list %>% reduce(full_join, by='date')

participant16_list <- list(get_home_time16, leave_home_time16, 
              start_travel_time16,end_travel_time16,
              start_work_time16,end_work_time16,
              get_restaurant_time16,leave_restaurant_time16,
              start_recreation_time16,end_recreation_time16)
participant16_routine <- participant16_list %>% reduce(full_join, by='date')
visielse(participant4_routine)

-parameters 
 method           :  global 
 grwithin         :   
 quantity         :  N 
 informer         :  median 
 tests            :  FALSE 
 threshold.test   :  0.01 
 pixel            :  20 
 t_0              :  0 
-MATp             : 10 x 74 sparse Matrix of class "dgCMatrix" 
-L                : 0 x 0  data.frame 
-idsort           : 0 x 0 matrix 
-MATpsup          : 0 x 0 sparse Matrix of class "dgCMatrix" 
-idsup            : length  0 vector 
-Lsup             : 0 x 0  data.frame 
-colvect          : 1 x 1 matrix 
-BZL              : 0 x 0 sparse Matrix of class "dgCMatrix" 
-book             : 10 x 6  ViSibook 
-group            : length  0 factor 
-vect_tps         : length  74 vector 
-testsP           : length  0 vector 
-informers        : 3 x 10 matrix 
visielse(participant16_routine)

-parameters 
 method           :  global 
 grwithin         :   
 quantity         :  N 
 informer         :  median 
 tests            :  FALSE 
 threshold.test   :  0.01 
 pixel            :  20 
 t_0              :  0 
-MATp             : 10 x 74 sparse Matrix of class "dgCMatrix" 
-L                : 0 x 0  data.frame 
-idsort           : 0 x 0 matrix 
-MATpsup          : 0 x 0 sparse Matrix of class "dgCMatrix" 
-idsup            : length  0 vector 
-Lsup             : 0 x 0  data.frame 
-colvect          : 1 x 1 matrix 
-BZL              : 0 x 0 sparse Matrix of class "dgCMatrix" 
-book             : 10 x 6  ViSibook 
-group            : length  0 factor 
-vect_tps         : length  74 vector 
-testsP           : length  0 vector 
-informers        : 3 x 10 matrix