matlab_to_df.R

Class: Script

Description: Converts the Matlab-based procTracks structure into an R-based dataframe.

Note that variable names may need to be edited in this script to match your version of procTracks.

If you are using the Matlab-based version of FAST, you will also need to re-save your track data in v6 .mat format for this script to work. Input the following commands to the Matlab command line to do this:

clear all
load('C:/Path/To/Target/Mat/Tracks.mat')
save('C:/Path/To/Resaved/Mat/Tracks.mat','procTracks','rawFromMappings','rawToMappings','rawTracks',...
    'trackTimes','toMappings','fromMappings','trackSettings','trackableData','linkStats','-v6')

Track .mat files for the stand-alone version of FAST are natively output in -v6 format.

Dependencies:

Author: Elisa Granato

matlab_to_df.R

# matlab_to_df.R, (c) Elisa Granato, 2019
 
###############################
# reading file from matlab
 
#!!! important: file has to be saved from Matlab with "save -v6" !!
 
library(R.matlab)
library(data.table)
tracks <- R.matlab::readMat(file.choose())
proc <- tracks$procTracks
proc_sub <- proc[,1,]
 
colnames(proc_sub) <- paste("cell",seq(ncol(proc_sub)))
 
output_collect <- list()
for(i in seq(ncol(proc_sub))){
  tmp <- proc_sub[,i]
  tmp <- lapply(tmp, as.numeric)
 
  tmp2 <- unlist(tmp)
  tmp_dt <- data.table(
    value = tmp2,
    observation = names(tmp2),
    cell = colnames(proc_sub)[i]
  )
 
  output_collect[[i]] <- tmp_dt
}
 
clean_dat <- rbindlist(output_collect, fill = T)
clean_dat <- as.data.frame(clean_dat)
 
#############################
 
#rename channel1, 2, 3 by fluorophore to remove non-frame numbers from those labels
 
clean_dat <- data.frame(lapply(clean_dat, function(x) {gsub("channel.1", "bf", x)}))
clean_dat <- data.frame(lapply(clean_dat, function(x) {gsub("channel.2", "gfp", x)}))
clean_dat <- data.frame(lapply(clean_dat, function(x) {gsub("channel.3", "pi", x)}))
 
clean_dat$value <- as.character(clean_dat$value)
clean_dat$value <- as.numeric(clean_dat$value)
clean_dat$cell <- as.character(clean_dat$cell)
clean_dat$observation <- as.character(clean_dat$observation)
 
 
#############################
 
 
# copy dataframe before major data wrangling
 
all.tracks <-  clean_dat
setDT(all.tracks)
 
#############################
 
#reshaping the data to have column names with variables
 
# EDIT VARIABLE NAMES HERE
# Most variable names correspond to the fields of procTracks. Channel names should be written in the order of their indices (e.g. in the below case, channel 1 = brightfield, channel 2 = GFP, channel 3 = Pi)
 
variables <- c("x","y","smoothx","smoothy","theta","vmag","smoothTheta","smoothVmag","majorLen","minorLen","area","phi","bf.mean","gfp.mean","pi.mean","bf.std","gfp.std","pi.std")
leftovers <- c("start","end","length")
 
# all observation variables "x, y," etc) will be turned into columns.
# times, start, end, and length need to be transformed into a new "frame.real" column, which denotes the absolute frame number where each measurement was taken.
# "frame.real" is calculated for each measurement: start (for the cell measured) + relative.frame (the number tacked onto the measurement variables at the moment) minus 1
 
#separate the factors from the frames
all.tracks[
    , observation_cat := gsub(pattern = "[0-9]", replacement = "", ignore.case = T,x = observation)][
    , observation_frame := gsub(pattern = "[A-Z.]", replacement = "", ignore.case = T,x = observation)][
    , observation_frame := as.numeric(observation_frame)]
 
#pull out the start, length, end stuff
lse.tracks <- all.tracks[observation %in% leftovers] 
other.tracks <- all.tracks[!observation %in% leftovers] 
 
 
#now do some calculations:
 
other.tracks[ , frame.real := lse.tracks[observation == "start"][match(other.tracks$cell,cell)]$value + observation_frame -1 ]
 
other.tracks_sub <- other.tracks[, c("cell","value","frame.real","observation_cat"), with = F]
 
#generate final dataframe
 
final_df <- dcast(other.tracks_sub, formula = cell+frame.real ~ observation_cat, value.var = "value")
 
#remove redundant "times" column
final_df <- subset(final_df, select=-c(times))