2016-07-28 3 views
2

アソシエーションルールは、どのイベントが一緒に起こるかを把握する際に非常に一般的な手法です(ハンバーガーとパンはほとんど一緒に販売されます)。マーケティングでは、この技術を使用して無料製品を検索します。インバースアソシエーションルール

私は「代替製品」を抽出し、どのイベントが一緒に起こりにくいかを知るために逆の関連ルールのようなテクニックを探しています。このために、Spark、R、Pythonなどのアルゴリズムやテクニックはありますか?

おかげで、 アミール

答えて

2

私は多分それはあなたを助けることができるR.ためTeng, Hsieh and Chen (2002)を使用して置換ルールマイニングのための非常に実用的な実装を行ってきた:

# Used packages: 
library(arules) 


SRM <- function(TransData, MinSup, MinConf, pMin, pChi, itemLabel, nTID){ 

# Packages ---------------------------------------------------------------- 

if (sum(search() %in% "package:arules") == 0) { 
stop("Please load package arules") 
} 

# Checking Input data ----------------------------------------------------- 
if (missing(TransData)) { 
    stop("Transaction data is missing") 
} 

if (is.numeric(nTID) == FALSE) { 
    stop("nTID has to be one numeric number for the count of  
Transactions") 
} 

    if (length(nTID) > 1) { 
    stop("nTID has to be one number for the count of Transactions") 
    } 

    if (is.character(itemLabel) == FALSE) { 
    stop("itemLabel has to be a character") 
    } 
    # Concrete Item sets --------------------------------------------------- 

    # adding complements to transaction data 
    compl_trans <- addComplement(TransData,labels = itemLabel) 
    compl_tab <- crossTable(compl_trans,"support") 
    compl_tab_D <- as.data.frame(compl_tab) 
    # ordering matrix 
    compl_tab_D <-   compl_tab_D[order(rownames((compl_tab))),order(colnames((compl_tab)))] 


    # Chi Value --------------------------------------------------------------- 


    # empty data frame for loop 

    complement_data <- data.frame(Chi = as.numeric(), 
          Sup_X.Y = as.numeric(), 
          X = as.character(), 
          Sup_X = as.numeric(), 
          Y = as.character(), 
          Sup_Y = as.numeric(), 
          CX = as.character(), 
          SupCX = as.numeric(), 
          CY = as.character(), 
          Sup_CY = as.numeric(), 
          Conf_X.CY = as.numeric(), 
          Sup_X.CY = as.numeric(), 
          Conf_Y.CX = as.numeric(), 
          SupY_CX = as.numeric()) 



    # first loop for one item 
    for (i in 1 : (length(itemLabel) - 1)) { 
    # second loop combines it with all other items 
    for (u in (i + 1) : length(itemLabel)) { 


    # getting chi value from Teng 
    a <- itemLabel[i] 
    b <- itemLabel[u] 
    ca <- paste0("!", itemLabel[i]) 
    cb <- paste0("!", itemLabel[u]) 

    chiValue <- nTID * (
    compl_tab[ca, cb]^2/(compl_tab[ca, ca] * compl_tab[cb, cb]) + 
     compl_tab[ca, b]^2/(compl_tab[ca, ca] * compl_tab[b, b]) + 
     compl_tab[a, cb]^2/(compl_tab[a, a] * compl_tab[cb, cb]) + 
     compl_tab[a, b]^2/(compl_tab[a, a] * compl_tab[b, b]) - 1) 



    # condition to be dependent 
    if (compl_tab[a, b] > compl_tab[a, a] * compl_tab[b, b] &&  chiValue >= qchisq(pChi, 1) && 
     compl_tab[a, a] >= MinSup && compl_tab[b, b] >= MinSup) { 



    chi_sup <- data.frame(Chi = chiValue, 
         Sup_X.Y = compl_tab[a, b], 
         X = a, 
         Sup_X = compl_tab[a, a], 
         Y = b, 
         Sup_Y = compl_tab[b, b], 
         CX = ca, 
         SupCX = compl_tab[ca, ca], 
         CY = cb, 
         Sup_CY = compl_tab[cb, cb], 
         Conf_X.CY = compl_tab[a, cb]/compl_tab[a, a], 
         Sup_X.CY = compl_tab[a, cb], 
         Conf_Y.CX = compl_tab[ca, b]/compl_tab[b, b], 
         SupY_CX = compl_tab[ca, b]) 


    try(complement_data <- rbind(complement_data, chi_sup)) 

    } 


    } 
    } 
    if (nrow(complement_data) == 0) { 
    stop("No complement item sets could have been found") 
    } 


    # changing mode of 
    complement_data$X <- as.character(complement_data$X) 
    complement_data$Y <- as.character(complement_data$Y) 


    # calculating support for concrete itemsets with all others and their complements ------------------- 


    ## with complements 
    matrix_trans <- as.data.frame(as(compl_trans, "matrix")) 

    sup_three <- data.frame(Items = as.character(), 
        Support = as.numeric()) 


    setCompl <- names(matrix_trans) 
    # 1. extracts all other values than that are not in the itemset 
    for (i in 1 : nrow(complement_data)) { 
    value <- setCompl[ !setCompl %in% c(complement_data$X[i], 
            complement_data$Y[i], 
            paste0("!", complement_data$X[i]), 
            paste0("!",complement_data$Y[i]))] 


    # 2. calculation of support 
    for (u in value) { 
    count <- sum(rowSums(matrix_trans[, c(complement_data$X[i],  complement_data$Y[i], u)]) == 3) 
    sup <- count/nTID 
    sup_three_items <- data.frame(Items =  paste0(complement_data$X[i], complement_data$Y[i], u), 
          Support=sup) 
    sup_three <- rbind(sup_three, sup_three_items) 
    } 

    } 

    # Correlation of single items------------------------------------------------------------- 


    # all items of concrete itemsets should be mixed for correlation 
    combis <- unique(c(complement_data$X, complement_data$Y)) 

    # empty object 
    rules<- data.frame(
    Substitute = as.character(), 
    Product = as.character(), 
    Support = as.numeric(), 
    Confidence = as.numeric(), 
    Correlation = as.numeric()) 

    # first loop for one item 
    for (i in 1 : (length(combis) - 1)) { 
    # second loop combines it with all other items 
    for (u in (i + 1) : length(combis)) { 

    first <- combis[i] 
    second <- combis[u] 

    corXY <- (compl_tab[first, second] - (compl_tab[first, first] *  compl_tab[second, second]))/
(sqrt((compl_tab[first, first] * (1 - compl_tab[first,first])) * 
     (compl_tab[second, second] * (1 - compl_tab[second, second])))) 


    # confidence 
    conf1 <- compl_tab[first, paste0("!", second)]/compl_tab[first, first] 
    conf2 <- compl_tab[second, paste0("!", first)]/compl_tab[second, second] 

    two_rules <- data.frame(
    Substitute = c(paste("{", first, "}"), 
       paste("{", second, "}")), 
    Product = c(paste("=>", "{", second, "}"), 
      paste("=>", "{", first, "}")), 
    Support = c(compl_tab[first, paste0("!", second)], compl_tab[second, paste0("!", first)]), 
    Confidence = c(conf1, conf2), 
    Correlation = c(corXY, corXY) 
    ) 

    # conditions 
    try({ 
    if (two_rules$Correlation[1] < pMin) { 
     if (two_rules$Support[1] >= MinSup && two_rules$Confidence[1] >= MinConf) { 
     rules <- rbind(rules, two_rules[1, ]) 
} 
     if (two_rules$Support[2] >= MinSup && two_rules$Confidence[2] >= MinConf) { 
     rules <- rbind(rules, two_rules[2, ]) 
     } 

    } }) 

    } 
    } 


    # Correlation of concrete item pairs with single items -------------------- 
    # adding variable for loop 
    complement_data$XY <- paste0(complement_data$X, complement_data$Y) 

    # combination of items 
    for (i in 1 : nrow(complement_data)){ 

    # set of combinations from dependent items with single items 
    univector <- c(as.vector(unique(complement_data$X)),  as.vector(unique(complement_data$Y))) 
    univector <- univector[!univector %in% c(complement_data$X[i], complement_data$Y[i])] 

    combis <- c(complement_data[i,"XY"], univector) 



    for (u in 2 : length(combis)) { 
    corXYZ <-(sup_three[sup_three$Items == paste0(combis[1], combis[u]),2] - 
       complement_data[complement_data$XY == combis[1],"Sup_X.Y"] * 
      compl_tab[combis[u],combis[u]])/
(sqrt((complement_data[complement_data$XY == combis[1],"Sup_X.Y"] * 
     (1 - complement_data[complement_data$XY == combis[1],"Sup_X.Y"]) * 
     compl_tab[combis[u],combis[u]] * (1 - compl_tab[combis[u],combis[u]])))) 

    dataXYZ <- data.frame(
Substitute = paste("{", combis[1], "}"), 
Product = paste("=>", "{", combis[u], "}"), 
Support = sup_three[sup_three$Items == paste0(combis[1], "!", combis[u]),2], 
Confidence = sup_three[sup_three$Items == paste0(combis[1], "!", combis[u]),2]/
complement_data[complement_data$XY == combis[1],"Sup_X.Y"], 
Correlation = corXYZ) 


    # conditions 
    if (dataXYZ$Correlation < pMin && dataXYZ$Support >= MinSup && dataXYZ$Confidence >= MinConf) { 

    try(rules <- rbind(rules, dataXYZ)) 
    } 
    } 
    } 
    if (nrow(rules) == 0) { 
    message("Sorry no rules could have been calculated. Maybe change input conditions.") 
    }  else { 
    return(rules) 
    } 

    # end 
} 

私はより良い説明がしてあると思います私のブログ: http://mattimeyer.github.io/2016-12-21-Substitution-Rule-Mining/

+0

これはすごい人です。 – JEquihua