Postgres set-up
library(DBI)
library(RPostgres)
library(jsonlite)
# Database connection
config <- jsonlite::read_json('C:/Users/brent/Documents/VS_Code/postgres/postgres/config.json')
con <- DBI::dbConnect(
RPostgres::Postgres(),
host = 'localhost',
port = '5432',
dbname = 'mlbench_data',
user = 'postgres',
password = config$pg_password
)
# Tips data to r dataframe
qry <- dbSendQuery(conn = con, statement = "select * from data.tips")
rtips <- dbFetch(qry)
head(rtips)
## total_bill tip sex smoker day time size
## 1 16.99 1.01 Female No Sun Dinner 2
## 2 10.34 1.66 Male No Sun Dinner 3
## 3 21.01 3.50 Male No Sun Dinner 3
## 4 23.68 3.31 Male No Sun Dinner 2
## 5 24.59 3.61 Female No Sun Dinner 4
## 6 25.29 4.71 Male No Sun Dinner 4
select * from data.tips limit 10;
Displaying records 1 - 10
16.99 |
1.01 |
Female |
No |
Sun |
Dinner |
2 |
10.34 |
1.66 |
Male |
No |
Sun |
Dinner |
3 |
21.01 |
3.50 |
Male |
No |
Sun |
Dinner |
3 |
23.68 |
3.31 |
Male |
No |
Sun |
Dinner |
2 |
24.59 |
3.61 |
Female |
No |
Sun |
Dinner |
4 |
25.29 |
4.71 |
Male |
No |
Sun |
Dinner |
4 |
8.77 |
2.00 |
Male |
No |
Sun |
Dinner |
2 |
26.88 |
3.12 |
Male |
No |
Sun |
Dinner |
4 |
15.04 |
1.96 |
Male |
No |
Sun |
Dinner |
2 |
14.78 |
3.23 |
Male |
No |
Sun |
Dinner |
2 |
Python set-up
library('reticulate')
use_condaenv(condaenv = 'STOCK_MASTER', required = TRUE)
import pandas as pd
ptips = r.rtips
ptips
## total_bill tip sex smoker day time size
## 0 16.99 1.01 Female No Sun Dinner 2
## 1 10.34 1.66 Male No Sun Dinner 3
## 2 21.01 3.50 Male No Sun Dinner 3
## 3 23.68 3.31 Male No Sun Dinner 2
## 4 24.59 3.61 Female No Sun Dinner 4
## .. ... ... ... ... ... ... ...
## 239 29.03 5.92 Male No Sat Dinner 3
## 240 27.18 2.00 Female Yes Sat Dinner 2
## 241 22.67 2.00 Male Yes Sat Dinner 2
## 242 17.82 1.75 Male No Sat Dinner 2
## 243 18.78 3.00 Female No Thur Dinner 2
##
## [244 rows x 7 columns]
N largest tips by sex where total bill > X
select * from (
select
t.*
,row_number() over (partition by sex order by tip) as row_num
from data.tips t
where total_bill > 8
) t1
where row_num <= 4
8 records
16.99 |
1.01 |
Female |
No |
Sun |
Dinner |
2 |
1 |
12.90 |
1.10 |
Female |
Yes |
Sat |
Dinner |
2 |
2 |
8.51 |
1.25 |
Female |
No |
Thur |
Lunch |
2 |
3 |
18.64 |
1.36 |
Female |
No |
Thur |
Lunch |
3 |
4 |
12.60 |
1.00 |
Male |
Yes |
Sat |
Dinner |
2 |
1 |
32.83 |
1.17 |
Male |
Yes |
Sat |
Dinner |
2 |
2 |
10.07 |
1.25 |
Male |
No |
Sat |
Dinner |
2 |
3 |
10.51 |
1.25 |
Male |
No |
Sat |
Dinner |
2 |
4 |
(
ptips
.assign(row_num = ptips.query("total_bill > 8").sort_values(["tip"]).groupby(["sex"]).cumcount() + 1)
.query("row_num <= 4")
.sort_values(["sex", "row_num"])
)
## total_bill tip sex smoker day time size row_num
## 0 16.99 1.01 Female No Sun Dinner 2 1.0
## 215 12.90 1.10 Female Yes Sat Dinner 2 2.0
## 135 8.51 1.25 Female No Thur Lunch 2 3.0
## 146 18.64 1.36 Female No Thur Lunch 3 4.0
## 236 12.60 1.00 Male Yes Sat Dinner 2 1.0
## 237 32.83 1.17 Male Yes Sat Dinner 2 2.0
## 75 10.51 1.25 Male No Sat Dinner 2 3.0
## 235 10.07 1.25 Male No Sat Dinner 2 4.0