AWS redshift is a MPP database in AWS. So working with redshift should be very similar with working with Teradata. Since this is MPP, we need to pay more attention on index and join operation, because those operations are performed on individual processor level then aggregated.
To use redshift, we need to create roles in IAM management. details see: (http://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-create-an-iam-role.html)
Normally we upload flat datafile to AWS S3, then create tables in redshift. Here are Python codes using psycopg2 and sqlalchemy packages. Key thing is to figure out thee connection string, and engine
import pandas as pd
from sqlalchemy import *
import psycopg2
connenction_string = "dbname='dev' port='5439' user='XXXX' password='XXXX' host='XXXX-redshift.XXXX.us-east-1.redshift.amazonaws.com'"
conn = psycopg2.connect(connenction_string)
cur = conn.cursor()
cur.execute(
'''create table lc_table(
id decimal(18,4) not null distkey sortkey,
member_id decimal(18,4) ,
loan_amnt decimal(18,4) ,
funded_amnt decimal(18,4) ,
funded_amnt_inv decimal(18,4) ,
term varchar(50) ,
int_rate varchar(50) ,
installment decimal(18,4) ,
grade varchar(50) ,
sub_grade varchar(50) ,
emp_length varchar(50) ,
home_ownership varchar(50) ,
annual_inc decimal(18,4) ,
verification_status varchar(50) ,
issue_d varchar(50) ,
loan_status varchar(200) ,
pymnt_plan varchar(200) ,
url varchar(500) ,
purpose varchar(500) ,
zip_code varchar(50) ,
addr_state varchar(50) ,
dti decimal(18,4) ,
delinq_2yrs decimal(18,4) ,
earliest_cr_line varchar(50) ,
fico_range_low decimal(18,4) ,
fico_range_high decimal(18,4) ,
inq_last_6mths decimal(18,4) ,
mths_since_last_delinq decimal(18,4) ,
mths_since_last_record decimal(18,4) ,
open_acc decimal(18,4) ,
pub_rec decimal(18,4) ,
revol_bal decimal(18,4) ,
revol_util varchar(50) ,
total_acc decimal(18,4) ,
initial_list_status varchar(50) ,
out_prncp decimal(18,4) ,
out_prncp_inv decimal(18,4) ,
total_pymnt decimal(18,4) ,
total_pymnt_inv decimal(18,4) ,
total_rec_prncp decimal(18,4) ,
total_rec_int decimal(18,4) ,
total_rec_late_fee decimal(18,4) ,
recoveries decimal(18,4) ,
collection_recovery_fee decimal(18,4) ,
last_pymnt_d varchar(50) ,
last_pymnt_amnt decimal(18,4) ,
next_pymnt_d varchar(50) ,
last_credit_pull_d varchar(50) ,
last_fico_range_high decimal(18,4) ,
last_fico_range_low decimal(18,4) ,
collections_12_mths_ex_med decimal(18,4) ,
mths_since_last_major_derog decimal(18,4) ,
policy_code varchar(200) ,
application_type varchar(200) ,
annual_inc_joint decimal(18,4) ,
dti_joint decimal(18,4) ,
verification_status_joint varchar(200) ,
acc_now_delinq decimal(18,4) ,
tot_coll_amt decimal(18,4) ,
tot_cur_bal decimal(18,4) ,
open_acc_6m decimal(18,4) ,
open_il_6m decimal(18,4) ,
open_il_12m decimal(18,4) ,
open_il_24m decimal(18,4) ,
mths_since_rcnt_il decimal(18,4) ,
total_bal_il decimal(18,4) ,
il_util decimal(18,4) ,
open_rv_12m decimal(18,4) ,
open_rv_24m decimal(18,4) ,
max_bal_bc decimal(18,4) ,
all_util decimal(18,4) ,
total_rev_hi_lim decimal(18,4) ,
inq_fi decimal(18,4) ,
total_cu_tl decimal(18,4) ,
inq_last_12m decimal(18,4) ,
acc_open_past_24mths decimal(18,4) ,
avg_cur_bal decimal(18,4) ,
bc_open_to_buy decimal(18,4) ,
bc_util decimal(18,4) ,
chargeoff_within_12_mths decimal(18,4) ,
delinq_amnt decimal(18,4) ,
mo_sin_old_il_acct decimal(18,4) ,
mo_sin_old_rev_tl_op decimal(18,4) ,
mo_sin_rcnt_rev_tl_op decimal(18,4) ,
mo_sin_rcnt_tl decimal(18,4) ,
mort_acc decimal(18,4) ,
mths_since_recent_bc decimal(18,4) ,
mths_since_recent_bc_dlq decimal(18,4) ,
mths_since_recent_inq decimal(18,4) ,
mths_since_recent_revol_delinq decimal(18,4) ,
num_accts_ever_120_pd decimal(18,4) ,
num_actv_bc_tl decimal(18,4) ,
num_actv_rev_tl decimal(18,4) ,
num_bc_sats decimal(18,4) ,
num_bc_tl decimal(18,4) ,
num_il_tl decimal(18,4) ,
num_op_rev_tl decimal(18,4) ,
num_rev_accts decimal(18,4) ,
num_rev_tl_bal_gt_0 decimal(18,4) ,
num_sats decimal(18,4) ,
num_tl_120dpd_2m decimal(18,4) ,
num_tl_30dpd decimal(18,4) ,
num_tl_90g_dpd_24m decimal(18,4) ,
num_tl_op_past_12m decimal(18,4) ,
pct_tl_nvr_dlq decimal(18,4) ,
percent_bc_gt_75 decimal(18,4) ,
pub_rec_bankruptcies decimal(18,4) ,
tax_liens decimal(18,4) ,
tot_hi_cred_lim decimal(18,4) ,
total_bal_ex_mort decimal(18,4) ,
total_bc_limit decimal(18,4) ,
total_il_high_credit_limit decimal(18,4)
)
''')
conn.commit()
cur = conn.cursor()
cur.execute('''copy lc_table from 's3://lc-analysis/final_lc_data.txt'
credentials 'aws_iam_role=arn:aws:iam::XXXXX:role/redshift_upload'
delimiter '\t' region 'us-east-1';
''')
conn.commit()
engine=create_engine('redshift+psycopg2://XXXX:XXXX@XXXX-redshift.XXXX.us-east-1.redshift.amazonaws.com:5439/dev')
query='''
select * from lc_table where id=4555054;
'''
out=pd.read_sql_query(query, engine)