larus

An R package designed for working with GSM data. It offers tools for data manipulation, trip identification, and calculation of trip parameters like duration, maximum distance, and path length. Additionally, it supports trip interpolation.

Miriam Lerma
2025-03-07

Intro

There are three key issues in determining the foraging trips in GSMs attached to gulls:

Install

You can install the development version of larus from GitHub with:

# install.packages("devtools")
devtools::install_github("MiriamLL/larus")

Load packages

To github

1. Battery checks

Plot to see changes in battery charge over time.

ggplot(GSM_battery, aes(x=daytime, y=battery_charge)) +
  geom_line() +
  scale_x_datetime(labels = date_format("%b"),date_breaks = "1 month")+
  theme_bw()+
  xlab('')

2. Identify trips

Separating by month makes it more easy to observe

This_month<-'06'
This_month_text<-'Jun'
Trips_01locs<-GSM_locs %>%
  filter(month==This_month)
nest_central_location<-data.frame(Longitude=-110.33979846296234,Latitude=24.28728834326802)
plot_check(my_locs=Trips_01locs,my_central_location=nest_central_location)

Identify trips

Trips_02outside<-Trips_01locs %>%
  filter(inside=='outside_central')
Previous_params<-data.frame(trip_id=c("trip_00001"))
trip_number_sequence<-continue_trip_sequence(my_previous=Previous_params$trip_id)
[1] "trip_00001"
[1] 1
Trips_03trips<-Trips_02outside %>%
  mutate(num_seq=as.numeric(num_seq))%>%
  mutate(trip_number = (cumsum(c(1L, diff(num_seq)) !=   1L)))%>%
  mutate(trip_number = trip_number +1 + trip_number_sequence)%>%
  mutate(trip_number = stringr::str_pad(trip_number,  5, pad = "0"))%>%
  mutate(trip_number = paste0("trip_", trip_number))
plot_trips(my_locs=Trips_03trips,my_central_location=nest_central_location)

Calculate parameters

Trips_04params<-calculate_params(my_locs=Trips_03trips,
                                 my_daytime='daytime',
                                 my_format=  "%Y-%m-%d %H:%M:%S",
                                 my_units="hours",
                                 my_divider="trip_number",
                                 my_gaps='gaps_min')

Check if values are plausible

range(Trips_04params$duration)
[1]  0.0000 14.4175
hist(Trips_04params$duration)

Trips_05params<-Trips_04params %>%
  mutate(trip_month_id=paste0(This_month_text,"_a_",trip_id))%>%
  mutate(central_location='colony')
Trips_04trips<-Trips_03trips %>%
  mutate(trip_month_id=paste0(This_month_text,"_a_",trip_number))%>%
  mutate(central_location='colony')
compare_notrips(my_params=Trips_05params,my_locs=Trips_04trips)
[1] "There are 156 trips in locations, and 156 in parameters"
[1] "There are 0 trips missing in locations, and 0 in parameters"

3. Reevaluate

knitr::kable(head(Trips_05params %>%
   arrange(-duration)%>%
   select(trip_id,duration),5))
trip_id duration
trip_00136 14.417500
trip_00138 9.833333
trip_00157 8.250000
trip_00087 6.250000
trip_00145 5.503889

Subset locations

Subset locations using parameters information, check plot, orange triangle should be on top of cluster of locations

reevaluate_tripid<-'trip_00136'
rest_central_location<-data.frame(Longitude=-110.325-0.001,Latitude=24.17-0.013)
Reevaluate_01locs<-subset_reevaluation(my_tripid=reevaluate_tripid,
                                       my_trip=Trips_03trips,
                                       new_central_location=rest_central_location,
                                       old_central_location=nest_central_location)

Identify trips reevaluation

Reevaluate_02trips<-identify_trips_reevaluation(my_trip=Reevaluate_01locs,
                                                my_central_location=rest_central_location,
                                                my_previous_params=Trips_05params$trip_id)
[1] "trip_00157"
[1] 157
[1] "From 1 original trip, the change in central location divided the locations to obtain 18 new trips"
plot_trips(my_locs=Reevaluate_02trips,my_central_location=rest_central_location)

Calculate parameters

Reevaluate_03params<-calculate_params(my_locs=Reevaluate_02trips,
                                      my_daytime='daytime',
                                      my_format=  "%Y-%m-%d %H:%M:%S",
                                      my_units="hours",
                                      my_divider="trip_number",
                                      my_gaps="gaps_min")
Reevaluate_04params<-Reevaluate_03params %>%
  mutate(trip_month_id=paste0(This_month_text,"_b_",trip_id))%>%
  mutate(central_location='south_of_colony')
Reevaluate_03trips<-Reevaluate_02trips %>%
  mutate(trip_month_id=paste0(This_month_text,"_a_",trip_number))%>%
  mutate(central_location='colony')

Merge

Params_01params<-rbind(Trips_05params,
                       Reevaluate_04params)
Locs_01trips<-rbind(Trips_04trips,
                    Reevaluate_03trips)
compare_notrips(my_params=Params_01params,my_locs=Locs_01trips)
[1] "There are 174 trips in locations, and 174 in parameters"
[1] "There are 0 trips missing in locations, and 0 in parameters"

Remove reevaluated trips

Params_02params<-Params_01params %>%
  filter(trip_id != reevaluate_tripid)
Locs_02trips<-Locs_01trips %>%
  filter(trip_number != reevaluate_tripid)
compare_notrips(my_params=Params_02params,my_locs=Locs_02trips)
[1] "There are 173 trips in locations, and 173 in parameters"
[1] "There are 0 trips missing in locations, and 0 in parameters"

4. Classify criteria

Classify parameters

Params_00criteria<-classify_params(my_params=Params_02params)
check_trip_criteria(my_params=Params_00criteria)
[1] "From 173 trips: 27.75% were trip_longer_than_30mins and 72.25% were trip_shorter_than_30mins. Trips shorter than 30 minutes are not considered real trips. Remove these trips from analyses. "
Params_01criteria<-Params_00criteria %>%
  dplyr::filter(trip_size == 'trip_longer_than_30mins')

Check resolution

check_resolution_criteria(my_params=Params_resolution)
[1] "From 27 trips: 3.7 % (n = 1) were low_resolution_gaps_more_60_mins and 96.3 % (n = 26) were ok_resolution_gaps_less_60_mins. Evaluate if trips with low resolution are to be kept"
Params_02criteria<-Params_01criteria %>%
  dplyr::filter(resolution == 'ok_resolution_gaps_less_60_mins')

Check trip lenght

check_length_criteria(my_params=Params_02criteria)
[1] "From 48 trips: 100% (n = 48) were shorter_than_24h_keep_centralloc and NA% (n = NA) were NA. Evaluate if trips longer than 24 hrs is because of a change in central location"
Params_03criteria<- Params_02criteria %>%
  dplyr::filter(interpolation == 'gapsless60mins_shorter24hr_canditate_interpolate')

Classify locations

Locs_01class<-classify_locs(Inter_params=Params_03criteria,
                            Inter_locs=Locs_02trips)
Locs_02class<-Locs_01class %>%
  dplyr::filter(trip_size == 'trip_longer_than_30mins')%>%
  dplyr::filter(interpolation == 'gapsless60mins_shorter24hr_canditate_interpolate') %>%
  dplyr::filter(resolution == 'ok_resolution_gaps_less_60_mins')

5. Path lenght

Trips must have at least three locations. Remove small trips

Path_01locs<-as.data.frame(Locs_02class)
short_trips<-Path_01locs %>%
  group_by(trip_number)%>%
  tally()%>%
  arrange(-n)%>%
  filter(n<3)
length(short_trips$trip_number)
[1] 0

Subset to keep only trips with more than 3 locations

Path_02locs<-Path_01locs %>%
  dplyr::filter(!trip_number %in% unique(short_trips$trip_number))

Calculate distances per trip

Might take some time

Path_03distances<-distances_per_trip(my_df=Path_02locs,
                                   my_divider='trip_number')
beepr::beep(sound=1)

Check if values are plausible

range(Path_03distances$pointsdist_km,na.rm=TRUE)

Calculate parameters

Path_04params<-Path_03distances %>% 
  group_by(trip_number)%>%
  summarise(path_lenght_km=sum(pointsdist_km,na.rm=TRUE))%>%
  mutate(trip_id=trip_number)

Merge

Params_path<-merge(Params_03criteria,
                   Path_04params,
                   by='trip_id')

Check if values are plausible

range(Params_path$path_lenght_km)
[1]  0.03 49.45

Calculate leaving and returning distances

nest_central_location<-data.frame(Longitude=-110.33979846296234,Latitude=24.28728834326802)
Params_leaving_returning<-calculate_leaving_returning(my_locs=Path_03distances,
                                                      my_central_location=nest_central_location)
Params_sumpath<-Params_leaving_returning %>%
  dplyr::rename(trip_id=trip_number)%>%
  dplyr::right_join(Params_path,by='trip_id')

Sum distances

Identify central location, if is from the nest it add leaving and returning, if is outside the nest it doesn’t add this.

Params_sumpaths<-Params_sumpath %>%
  dplyr::rename(central_location=central_location.x)%>%
  mutate(sum_path_lenght=case_when(central_location == "colony" ~ path_lenght_km + leaving_distance_km + returning_distance_km,
                                                 central_location != "colony" ~ path_lenght_km))
Params_wpath<-Params_sumpaths %>%
  dplyr::select(trip_month_id,central_location,
         path_lenght_km,leaving_distance_km,returning_distance_km,
         sum_path_lenght,
         central_lat,central_lon,
         trip_number,
         #countday,day,season,month,n,
         trip_start,trip_end,
         duration,min_gap,max_gap,
         trip_size,resolution,params_analyses,interpolation
         )

Check if there are outliers

knitr::kable(head(Params_wpath %>%
                    dplyr::arrange(-sum_path_lenght)%>%
                    dplyr::select(trip_month_id,sum_path_lenght),5))
trip_month_id sum_path_lenght
Jun_a_trip_00138 50.81
Jun_a_trip_00145 50.71
Jun_a_trip_00087 43.84
Jun_a_trip_00049 41.89
Jun_a_trip_00015 41.61

Check speed

Path_04speed<-Path_03distances %>%
  mutate(speed_if_directly_flying=pointsdist_km/gaps_min*60)%>%
  select(ID,trip_number,num_seq,
         #countday,season,month,
         daytime,
         Longitude,Latitude,
         #battery_charge,battery_class, gaps_class,
         #central_location,day_or_night,
         #trip_size,resolution,params_analyses,interpolation,
         pointsdist_km,gaps_min,
         #ground.speed,
         speed_if_directly_flying
         )
knitr::kable(head(Path_04speed %>%
  #filter(speed_if_directly_flying > 70)%>%
  arrange(-speed_if_directly_flying)%>%
  select(trip_number,speed_if_directly_flying),5))
trip_number speed_if_directly_flying
trip_00009.13 trip_00009 60.22719
trip_00134.460 trip_00134 49.76471
trip_00017.58 trip_00017 48.42000
trip_00147.756 trip_00147 46.38655
trip_00012.26 trip_00012 46.08000

In case there is a trip exceeding speed:

exceeds_speed<-'trip_00009'
plot_trips(my_locs=Path_04speed %>%
  filter(trip_number==exceeds_speed),my_central_location=nest_central_location)+
  ggtitle('If the speed is >70 km, \n remove that trip or reevaluate it')

And remember gulls do whatever they want
So remove it from the list if its difficult to find the central location

Params_wpath<-Params_wpath %>%
  filter(trip_number!=exceeds_speed)%>%
  rename(trip_id=trip_number)
Path_speed<-Path_04speed %>%
  filter(trip_number!=exceeds_speed)
compare_notrips(my_params=Params_wpath,my_locs=Path_speed)
[1] "There are 47 trips in locations, and 47 in parameters"
[1] "There are 0 trips missing in locations, and 0 in parameters"

6. Maxdistance

Calculate maximum distance per trip

Maxdist_params<-calculate_maxdist(my_data = Locs_02class, 
                                    central_location = data.frame(Longitude=-110.34,Latitude=24.28),
                                    divider="trip_number")
knitr::kable(head(Maxdist_params,5))
trip_id maxdist_km
trip_00002 5.15
trip_00009 14.43
trip_00011 14.36
trip_00012 14.26
trip_00015 13.86
Params_final<-Params_wpath %>%
  right_join(Maxdist_params, by='trip_id')%>%
  select(trip_id,
         duration,sum_path_lenght,maxdist_km)
knitr::kable(head(Params_final,5))
trip_id duration sum_path_lenght maxdist_km
trip_00002 0.6655556 12.65 5.15
trip_00011 1.6800000 33.75 14.36
trip_00012 2.0000000 33.20 14.26
trip_00015 1.8333333 41.61 13.86
trip_00017 1.4769444 32.60 14.07

7. Interpolate

Interpolate trips

This function interpolate trips to a similar resolution. Here using ‘900 sec’.

Interpolation_trips<-Interpolation_trips

Interpolated_locs<-interpolate_trips(my_df=Interpolation_trips,
                                    interval='900 sec',
                                    column_datetime='daytime',
                                    column_trip='trip_number',
                                    column_lat='Latitude',
                                    column_lon='Longitude',
                                    datetime_format="%Y-%m-%d %H:%M:%S")

Citation

Lerma, M. 2025. R package ‘larus’. https://github.com/MiriamLL/larus