gravatar

danielkim

danielkim

Recently Published

Plot
importance
decision tree
naive Bayes
gbm
rpart
prp(rpartmod)
printcp(rpartmod)
rpart
★public_cat2
공공조달세분류(자체) public_cat2
public_cat1
공공조달중분류 public_cat1
cat2
대표세부물품분류명 cat2
cat1
대표물품분류명 cat1
price
예정가격 price
★client_region
수요기관지역 client_region
★client_kind
수요기관구분 client_kind
★client_group
기관 그룹화 client_group
naive Bayes
random forest
gbm
prp(rpartmod)
plotcp(rpartmod)
rpart.plot(rpartmod)
single tree
randomForest_varImp
prp
rpart.plot
decision tree 4
결측치
결측값
mj_paracoord_in
mj_graph for 13 rules_in
mj_scatterplot_in
mj_paracoord
mj_graph for 10 rules
mj_scatterplot
Neural Network
decision tree 3
random forest 3
naive Bayes3
vist_lst_months
vist_lst_months 12개월 부터 는 휴면회원 율이 높다. 당연한 결과임 > 일부 회원은 다시 갱신된 회원인가? 따라서, vist_lst_months는 X 변수에서 제외되어야 할 것 같다. 실제 현장에서는 12개월 이내에 고객에 대해서 예측해야 하므로 물론 vist_lst_months가 많으면 이미 휴면 상태이긴 하지만.
random forest
gbm(gradient bayse machine)
decision tree
naive Bayes
grid_nb <- expand.grid(fL = c(0, 1), usekernel = c(FALSE, TRUE), adjust = c(0, 1)) set.seed(1234) m_nb <- train(Class ~ ., data = train, method = "nb", preProcess = preProc, trControl = control, tuneGrid = grid_nb)
gbm(gradient bayse machine)
grid_gbm <- expand.grid( interaction.depth = c(5, 10), n.trees = c(100, 300), shrinkage = c(0.1, 0.01), n.minobsinnode = 30) set.seed(1234) m_gbm <- train(Class ~ ., data = train, method = "gbm", preProcess = preProc, trControl = control, tuneGrid = grid_gbm, verbose = F)
random forest
single tree : rpart
m_rp <- train(x = my_x, y = my_y, method = "rpart", #metric = "ROC", preProcess = preProc, trControl = control, tuneGrid = grid_rpart)
visit_lst_dt
order2 <- order[ -which(order$visit_lst_dt == '0001-01-01 BC'), ] 이상치 제거
visit_lst_dt2
## 0001-01-01 280개 존재 ## value가 이상하게 분포함
neural network plot
ROC2
Logistic Regression(로지스틱 회귀분석) 종속변수 mmbr_yn 회원/휴면 값
age_na값처리
ROC(Receiver Operating Curve)
Logistic Regression(로지스틱 회귀분석) 종속변수 변경 휴면 : 0 회원 : 1
point_rate
kakao_rate
phone_rate
paynow_rate
account_rate
card_rate
tel_rate
pc_rate
mob_rate
use_cpn_cnt
prod_cnt_per_order
order_cnt
avg_order_prd
grade2
mail_yn
sms_yn
as.factor(order$join_dt)
join_dt3
as.character(order$join_dt)
join_dt2
age2
sex2
visit_cnt histogram
qplot(visit_cnt, data=order, geom="histogram", bins = 10, na.rm = T)
GRADE
AGE
VISIT_LST_DT
VISIT_CNT
JOIN_DT
RFND_YN
VISIT_TYPE
PAY_TYPE
g <- ggplot(pro.3, aes(PAY_TYPE)) plot1 <- g + geom_bar(aes(fill = MMBR_YN)) plot2 <- g + geom_bar(aes(fill = MMBR_YN), position = "fill") grid.arrange(plot1, plot2, nrow=2, ncol=1)
SEX