2016-12-05 6 views

matplotlibを使用して要素周波数のbarchartを作成しようとしています。 これを達成するために、私は、フラグのリストに関してパンダのデータフレーム列の出現量を数えることができる必要があります。 以下は、私は私のノートブック/データを持っているコードのラフスケッチ与える:私はちょうど今、この思い付いたリストに対するパンダデータフレームの出現数をカウントする

# list of filtered values 
    filtered = [200, 201, 201, 201, 201, 201, 
    211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 
    237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 
    237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 
    237, 237, 237, 237, 237, 237, 237, 237, 250, 250, 250, 250, 250, 250, 250, 
    250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 
    250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 
    250, 250, 250, 250, 254] 

    # list of flags to use for filtering 
    flags = [200, 201, 211, 237, 239, 250, 254, 255] 
    # this was just a line to code for testing 
    flags_dict = {200:0,201:0,211:0,237:0,239:0,250:0,254:0,255:0} 

    freq = filtered.value_counts() 

    Expected flags_dict: 
    200: 1 
    201: 5 
    211: 14 
    237: 38 
    239: 0 
    250: 40 
    254: 1 
    255: 0 

    These are the values from the real dataframe but they do not take into 
    account the other flags in the flags list 
    250.0 7682 
    211.0 3734 
    200.0 1483 
    239.0  180 
    201.0  34  




 #column_data is a list created from a pandas Dataframe column 
     column_data = list(filtered['C5 Terra']) 
     flags_dict[200] = column_data.count(200) 
     flags_dict[201] = column_data.count(201) 
     flags_dict[211] = column_data.count(211) 
     flags_dict[237] = column_data.count(237) 
     flags_dict[239] = column_data.count(239) 
     flags_dict[250] = column_data.count(250) 
     flags_dict[254] = column_data.count(254) 
     flags_dict[255] = column_data.count(255) 


import pandas as pd 

filtered = [200, 201, 201, 201, 201, 201, 211, 211, 211, 211, 211, 211, 211, 211, 211, 
      211, 211, 211, 211, 211, 
      237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 
      237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 
      237, 237, 237, 237, 237, 237, 237, 237, 250, 250, 250, 250, 250, 250, 250, 
      250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 
      250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 
      250, 250, 250, 250, 254] 

filtered = pd.Series(filtered) 

freq = filtered.value_counts(sort=False) 
flags = [200, 201, 211, 237, 239, 250, 254, 255] 
flags_dict = {} 
for flag in flags: 
     flags_dict[flag] = freq[flag] 
     flags_dict[flag] = 0 




In [1]: filtered[filtered.isin(flags)].value_counts().reindex(flags, fill_value=0) 
Out[1]: 200  1 
     201  5 
     211 14 
     237 38 
     239  0 
     250 41 
     254  1 
     255  0 
     dtype: int64 


In [2]: filtered[filtered.isin(flags)].value_counts().reindex(flags, fill_value=0).to_dict() 

Out[2]: {200: 1, 201: 5, 211: 14, 237: 38, 239: 0, 250: 41, 254: 1, 255: 0} 