With help from the pandas documentation


Postgres set-up

library(DBI)
library(RPostgres)
library(jsonlite)

# Database connection
config <- jsonlite::read_json('C:/Users/brent/Documents/VS_Code/postgres/postgres/config.json')

con <- DBI::dbConnect(
  RPostgres::Postgres(),
  host      = 'localhost',
  port      = '5432',
  dbname    = 'mlbench_data',
  user      = 'postgres',
  password  = config$pg_password
)
# Tips data to r dataframe
qry <- dbSendQuery(conn = con, statement = "select * from data.tips")
rtips <- dbFetch(qry)
head(rtips)
##   total_bill  tip    sex smoker day   time size
## 1      16.99 1.01 Female     No Sun Dinner    2
## 2      10.34 1.66   Male     No Sun Dinner    3
## 3      21.01 3.50   Male     No Sun Dinner    3
## 4      23.68 3.31   Male     No Sun Dinner    2
## 5      24.59 3.61 Female     No Sun Dinner    4
## 6      25.29 4.71   Male     No Sun Dinner    4


select * from data.tips limit 10;
Displaying records 1 - 10
total_bill tip sex smoker day time size
16.99 1.01 Female No Sun Dinner 2
10.34 1.66 Male No Sun Dinner 3
21.01 3.50 Male No Sun Dinner 3
23.68 3.31 Male No Sun Dinner 2
24.59 3.61 Female No Sun Dinner 4
25.29 4.71 Male No Sun Dinner 4
8.77 2.00 Male No Sun Dinner 2
26.88 3.12 Male No Sun Dinner 4
15.04 1.96 Male No Sun Dinner 2
14.78 3.23 Male No Sun Dinner 2


Python set-up

library('reticulate')
use_condaenv(condaenv = 'STOCK_MASTER', required = TRUE)


import pandas as pd
ptips = r.rtips
ptips
##      total_bill   tip     sex smoker   day    time  size
## 0         16.99  1.01  Female     No   Sun  Dinner     2
## 1         10.34  1.66    Male     No   Sun  Dinner     3
## 2         21.01  3.50    Male     No   Sun  Dinner     3
## 3         23.68  3.31    Male     No   Sun  Dinner     2
## 4         24.59  3.61  Female     No   Sun  Dinner     4
## ..          ...   ...     ...    ...   ...     ...   ...
## 239       29.03  5.92    Male     No   Sat  Dinner     3
## 240       27.18  2.00  Female    Yes   Sat  Dinner     2
## 241       22.67  2.00    Male    Yes   Sat  Dinner     2
## 242       17.82  1.75    Male     No   Sat  Dinner     2
## 243       18.78  3.00  Female     No  Thur  Dinner     2
## 
## [244 rows x 7 columns]


N largest tips by sex where total bill > X

select * from (
    select 
    t.* 
    ,row_number() over (partition by sex order by tip) as row_num
    from data.tips t
    where total_bill > 8
) t1
where row_num <= 4
8 records
total_bill tip sex smoker day time size row_num
16.99 1.01 Female No Sun Dinner 2 1
12.90 1.10 Female Yes Sat Dinner 2 2
8.51 1.25 Female No Thur Lunch 2 3
18.64 1.36 Female No Thur Lunch 3 4
12.60 1.00 Male Yes Sat Dinner 2 1
32.83 1.17 Male Yes Sat Dinner 2 2
10.07 1.25 Male No Sat Dinner 2 3
10.51 1.25 Male No Sat Dinner 2 4
(
    ptips
    .assign(row_num = ptips.query("total_bill > 8").sort_values(["tip"]).groupby(["sex"]).cumcount() + 1)
    .query("row_num <= 4")
    .sort_values(["sex", "row_num"])
)
##      total_bill   tip     sex smoker   day    time  size  row_num
## 0         16.99  1.01  Female     No   Sun  Dinner     2      1.0
## 215       12.90  1.10  Female    Yes   Sat  Dinner     2      2.0
## 135        8.51  1.25  Female     No  Thur   Lunch     2      3.0
## 146       18.64  1.36  Female     No  Thur   Lunch     3      4.0
## 236       12.60  1.00    Male    Yes   Sat  Dinner     2      1.0
## 237       32.83  1.17    Male    Yes   Sat  Dinner     2      2.0
## 75        10.51  1.25    Male     No   Sat  Dinner     2      3.0
## 235       10.07  1.25    Male     No   Sat  Dinner     2      4.0