r - Efficient comparison of POSIXct in data.table -


hello looking efficient way of selecting posixct rows data.table such time of day less 12:00:00 (note millisecond not required, can use itime example)

set.seed(1); n = 1e7; dt = data.table(dts = .posixct(1e5*rnorm(n), tz="gmt")) dt                                dts #       1: 1969-12-31 06:35:54.618925 #       2: 1970-01-01 05:06:04.332422 #     ---                            # 9999999: 1970-01-03 00:37:00.035565 #10000000: 1969-12-30 08:30:23.624506 

one solution (the problem here cast costly if n big)

f <- function(t, st, et) {time <- as.itime(t); return(time>=as.itime(st) & time<=as.itime(et))} p <- function(t, s) { #geektrader solution     ep <- .parseiso8601(s)      if(grepl('t[0-9]{2}:[0-9]{2}:[0-9]{2}/t[0-9]{2}:[0-9]{2}:[0-9]{2}', s)){         first.time <- as.double(ep$first.time)         last.time <- as.double(ep$last.time)-31449600         secofday <- as.double(t) %% 86400         return(secofday >= first.time & secofday <= last.time )     } else {         return(t >= ep$first.time & t <= ep$last.time)         } } 

quick perf

system.time(resf <- dt[f(dts,'00:00:00','11:59:59')])    user  system elapsed     1.01    0.28    1.29 system.time(resp <- dt[p(dts,'t00:00:00/t11:59:59')])    user  system elapsed     0.64    0.13    0.76   identical(resf,resp) [1] true 

 p <- function(t, s) {   ep <- .parseiso8601(s)    if(grepl('t[0-9]{2}:[0-9]{2}:[0-9]{2}/t[0-9]{2}:[0-9]{2}:[0-9]{2}', s)){     first.time <- as.double(ep$first.time)     last.time <- as.double(ep$last.time)-31449600     secofday <- as.double(t) %% 86400     return(secofday >= first.time & secofday <= last.time )    } else {     return(t >= ep$first.time & t <= ep$last.time)       }  }  f <- function(t, st, et) {   time <- as.itime(t)    return(time>=as.itime(st) & time<=as.itime(et)) }    sys.setenv(tz='gmt')  n = 1e7;  set.seed(1);   dt <- data.table(dts = .posixct(1e5*rnorm(n), tz="gmt"))    system.time(resp <- dt[p(dts, 't00:00:00/t12:00:00'), ]) ##   user  system elapsed  ##   1.11    0.11    1.22   system.time(resf <- dt[f(dts,'00:00:00','12:00:00')]) ##   user  system elapsed  ##   2.22    0.29    2.51    resp ##                         dts ##      1: 1969-12-31 06:35:54 ##      2: 1970-01-01 05:06:04 ##      3: 1969-12-31 00:47:17 ##      4: 1970-01-01 09:09:10 ##      5: 1969-12-31 01:12:33 ##     ---                     ##5000672: 1970-01-01 06:08:15 ##5000673: 1970-01-01 05:02:27 ##5000674: 1969-12-31 02:25:24 ##5000675: 1970-01-03 00:37:00 ##5000676: 1969-12-30 08:30:23  resf ##                         dts ##      1: 1969-12-31 06:35:54 ##      2: 1970-01-01 05:06:04 ##      3: 1969-12-31 00:47:17 ##      4: 1970-01-01 09:09:10 ##      5: 1969-12-31 01:12:33 ##     ---                     ##5000672: 1970-01-01 06:08:15 ##5000673: 1970-01-01 05:02:27 ##5000674: 1969-12-31 02:25:24 ##5000675: 1970-01-03 00:37:00 ##5000676: 1969-12-30 08:30:23   #check correctness  resp[,list(mindts=max(dts)),by=list(as.date(dts))] ##       as.date              mindts ## 1: 1969-12-31 1969-12-31 12:00:00 ## 2: 1970-01-01 1970-01-01 12:00:00 ## 3: 1969-12-29 1969-12-29 12:00:00 ## 4: 1970-01-02 1970-01-02 12:00:00 ## 5: 1969-12-30 1969-12-30 12:00:00 ## 6: 1970-01-03 1970-01-03 12:00:00 ## 7: 1970-01-04 1970-01-04 11:59:59 ## 8: 1970-01-05 1970-01-05 11:59:45 ## 9: 1969-12-28 1969-12-28 12:00:00 ##10: 1969-12-27 1969-12-27 11:59:21 ##11: 1970-01-06 1970-01-06 10:53:21 ##12: 1969-12-26 1969-12-26 10:15:03 ##13: 1970-01-07 1970-01-07 08:21:55  resf[,list(mindts=max(dts)),by=list(as.date(dts))] ##       as.date              mindts ## 1: 1969-12-31 1969-12-31 12:00:00 ## 2: 1970-01-01 1970-01-01 12:00:00 ## 3: 1969-12-29 1969-12-29 12:00:00 ## 4: 1970-01-02 1970-01-02 12:00:00 ## 5: 1969-12-30 1969-12-30 12:00:00 ## 6: 1970-01-03 1970-01-03 12:00:00 ## 7: 1970-01-04 1970-01-04 11:59:59 ## 8: 1970-01-05 1970-01-05 11:59:45 ## 9: 1969-12-28 1969-12-28 12:00:00 ##10: 1969-12-27 1969-12-27 11:59:21 ##11: 1970-01-06 1970-01-06 10:53:21 ##12: 1969-12-26 1969-12-26 10:15:03 ##13: 1970-01-07 1970-01-07 08:21:55 

now demo of nice xts style subsetting

 dt[p(dts, '1970')] ##                         dts ##      1: 1970-01-01 05:06:04 ##      2: 1970-01-02 20:18:48 ##      3: 1970-01-01 09:09:10 ##      4: 1970-01-01 13:32:22 ##      5: 1970-01-01 20:30:32 ##     ---                     ##5001741: 1970-01-02 15:51:12 ##5001742: 1970-01-03 01:41:31 ##5001743: 1970-01-01 06:08:15 ##5001744: 1970-01-01 05:02:27 ##5001745: 1970-01-03 00:37:00  dt[p(dts, '197001')] ##                         dts ##      1: 1970-01-01 05:06:04 ##      2: 1970-01-02 20:18:48 ##      3: 1970-01-01 09:09:10 ##      4: 1970-01-01 13:32:22 ##      5: 1970-01-01 20:30:32 ##     ---                     ##5001741: 1970-01-02 15:51:12 ##5001742: 1970-01-03 01:41:31 ##5001743: 1970-01-01 06:08:15 ##5001744: 1970-01-01 05:02:27 ##5001745: 1970-01-03 00:37:00  dt[p(dts, '19700102')] ##                         dts ##      1: 1970-01-02 20:18:48 ##      2: 1970-01-02 17:59:38 ##      3: 1970-01-02 07:14:53 ##      4: 1970-01-02 02:13:03 ##      5: 1970-01-02 01:31:37 ##     ---                     ##1519426: 1970-01-02 11:25:24 ##1519427: 1970-01-02 10:00:21 ##1519428: 1970-01-02 05:21:25 ##1519429: 1970-01-02 05:11:26 ##1519430: 1970-01-02 15:51:12  dt[p(dts, '19700102 00:00:00/19700103 12:00:00')] ##                         dts ##      1: 1970-01-02 20:18:48 ##      2: 1970-01-02 17:59:38 ##      3: 1970-01-02 07:14:53 ##      4: 1970-01-02 02:13:03 ##      5: 1970-01-02 01:31:37 ##     ---                     ##1785762: 1970-01-02 05:21:25 ##1785763: 1970-01-02 05:11:26 ##1785764: 1970-01-02 15:51:12 ##1785765: 1970-01-03 01:41:31 ##1785766: 1970-01-03 00:37:00   #check correctness again  dt[p(dts, '19700102 00:00:00/19700103 12:00:00'), max(dts)] ##[1] "1970-01-03 12:00:00 gmt"  dt[p(dts, '19700102 00:00:00/19700103 12:00:00'), min(dts)] ##[1] "1970-01-02 00:00:00 gmt" 

Comments

Popular posts from this blog

monitor web browser programmatically in Android? -

Shrink a YouTube video to responsive width -

wpf - PdfWriter.GetInstance throws System.NullReferenceException -