2017-11-23 36 views

興味のある場所に関する情報を含むJSONファイル 'data.json'があります。異なる長さのネストされたJSONをデータフレームに変換するR

data = lapply(readLines("data.json"), fromJSON) 


list(structure(list(payload = structure(list(existence_full = 1L, 
    geo_virtual = "[\"56.9459720|-2.1971226|20|within_50m|4\"]", 
    latitude = "56.945972", locality = "Stonehaven", `_records_touched` = "{\"crawl\":8,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":0,\"user_submission\":0,\"tdc\":0,\"gov\":0}", 
    address = "The Lodge, Dunottar", email = "[email protected]", 
    existence_ml = 0.569423821765872, domain_aggregate = "", 
    name = "Dunnottar Castle", search_tags = c("Dunnottar Castle Aberdeenshire", 
    "Dunotter Castle"), admin_region = "Scotland", existence = 1L, 
    category_labels = structure(c("Landmarks", "Buildings and Structures" 
    ), .Dim = 1:2), post_town = "Stonehaven", region = "Kincardineshire", 
    review_count = "719", geocode_level = "within_50m", tel = "01569 762173", 
    placerank = 65L, longitude = "-2.197123", placerank_ml = 37.2791607346447, 
    fax = "01330 860325", category_ids_text_search = "", website = "http://www.dunnottarcastle.co.uk", 
    status = "1", geocode_confidence = "20", postcode = "AB39 2TL", 
    category_ids = 108L, country = "gb", `_geocode_quality` = "4"), .Names = c("existence_full", 
"geo_virtual", "latitude", "locality", "_records_touched", "address", 
"email", "existence_ml", "domain_aggregate", "name", "search_tags", 
"admin_region", "existence", "category_labels", "post_town", 
"region", "review_count", "geocode_level", "tel", "placerank", 
"longitude", "placerank_ml", "fax", "category_ids_text_search", 
"website", "status", "geocode_confidence", "postcode", "category_ids", 
"country", "_geocode_quality")), uuid = "3867aaf3-12ab-434f-b12b-5d627b3359c3"), .Names = c("payload", 
"uuid")), structure(list(payload = structure(list(existence_full = 1L, 
    geo_virtual = "[\"56.237480|-5.073578|20|within_50m|4\"]", 
    latitude = "56.237480", locality = "Inveraray", `_records_touched` = "{\"crawl\":11,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":0,\"user_submission\":0,\"tdc\":0,\"gov\":0}", 
    address = "Cherry Park", email = "enqu[email protected]", 
    longitude = "-5.073578", domain_aggregate = "", name = "Inveraray Castle", 
    admin_region = "Scotland", search_tags = c("Inveraray Castle Tea Room", 
    "Inverary Castle"), existence = 1L, category_labels = structure(c("Social", 
    "Food and Dining", "Restaurants"), .Dim = c(1L, 3L)), region = "Argyll", 
    review_count = "532", geocode_level = "within_50m", tel = "01499 302203", 
    placerank = 67L, post_town = "Inveraray", placerank_ml = 41.1997808735227, 
    fax = "01499 302421", category_ids_text_search = "", website = "http://www.inveraray-castle.com", 
    status = "1", geocode_confidence = "20", postcode = "PA32 8XE", 
    category_ids = 347L, country = "gb", `_geocode_quality` = "4", 
    existence_ml = 0.791488110284778), .Names = c("existence_full", 
"geo_virtual", "latitude", "locality", "_records_touched", "address", 
"email", "longitude", "domain_aggregate", "name", "admin_region", 
"search_tags", "existence", "category_labels", "region", "review_count", 
"geocode_level", "tel", "placerank", "post_town", "placerank_ml", 
"fax", "category_ids_text_search", "website", "status", "geocode_confidence", 
"postcode", "category_ids", "country", "_geocode_quality", "existence_ml" 
)), uuid = "8278ab80-2cd1-4dbd-9685-0d0036b681eb"), .Names = c("payload", 
"uuid")), structure(list(payload = structure(list(existence_full = 1L, 
    geo_virtual = "[\"51.483872|-0.606820|100|rooftop|2\"]", 
    latitude = "51.483872", locality = "Windsor Castle", hours_display = "Mon-Sat 11:30 AM-11:00 PM; Sun 12:00 PM-11:00 PM", 
    `_records_touched` = "{\"crawl\":7,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":2,\"user_submission\":0,\"tdc\":0,\"gov\":0}", 
    address = "", longitude = "-0.606820", domain_aggregate = "", 
    name = "Windsor Castle", admin_region = "England", search_tags = c("The Windsor Castle", 
    "The Windsor Castle Pub", "The Windsor Castle Public House", 
    "Pub Food", "British"), existence = 1L, category_labels = structure(c("Landmarks", 
    "Buildings and Structures"), .Dim = 1:2), region = "Berkshire", 
    review_count = "", geocode_level = "rooftop", tel = "020 7766 7304", 
    placerank = 62L, post_town = "Windsor", placerank_ml = 28.1160845346327, 
    fax = "01753 832290", category_ids_text_search = "", website = "http://www.royalcollection.org.uk/visit/windsorcastle", 
    status = "1", hours = "{\"monday\":[[\"11:30\",\"23:00\"]],\"tuesday\":[[\"11:30\",\"23:00\"]],\"wednesday\":[[\"11:30\",\"23:00\"]],\"thursday\":[[\"11:30\",\"23:00\"]],\"friday\":[[\"11:30\",\"23:00\"]],\"saturday\":[[\"11:30\",\"23:00\"]],\"sunday\":[[\"12:00\",\"23:00\"]]}", 
    neighborhood = "Chalvey", geocode_confidence = "100", postcode = "SL4 1NJ", 
    category_ids = 108L, country = "gb", `_geocode_quality` = "2", 
    existence_ml = 0.885705196944165, email = "[email protected]"), .Names = c("existence_full", 
"geo_virtual", "latitude", "locality", "hours_display", "_records_touched", 
"address", "longitude", "domain_aggregate", "name", "admin_region", 
"search_tags", "existence", "category_labels", "region", "review_count", 
"geocode_level", "tel", "placerank", "post_town", "placerank_ml", 
"fax", "category_ids_text_search", "website", "status", "hours", 
"neighborhood", "geocode_confidence", "postcode", "category_ids", 
"country", "_geocode_quality", "existence_ml", "email")), uuid = "c5f7d8a9-0851-46ef-8da7-ad55e187d3a8"), .Names = c("payload", 
"uuid")), structure(list(payload = structure(list(existence_full = 1L, 
    category_ids_text_search = "", placerank_ml = 31.9857184762157, 
    longitude = "-2.191955", name = "Pitmedden Garden", domain_aggregate = "", 
    admin_region = "Scotland", languages = "English", region = "Aberdeenshire", 
    review_count = "2", geocode_level = "rooftop", tel = "01651 842352", 
    placerank = 57L, post_town = "Ellon", category_labels = structure(c("Landmarks", 
    "Gardens"), .Dim = 1:2), existence = 1L, fax = "0844 493 2102", 
    website = "http://www.nts.org.uk/Property/Pitmedden-Garden", 
    status = "1", geocode_confidence = "100", postcode = "AB41 7PD", 
    country = "gb", category_ids = 109L, `_geocode_quality` = "4", 
    existence_ml = 0.849871115334588, email = "[email protected]", 
    address = "", `_records_touched` = "{\"crawl\":6,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":0,\"user_submission\":0,\"tdc\":0,\"gov\":0}", 
    locality = "Pitmedden", latitude = "57.343233", geo_virtual = "[\"57.343233|-2.191955|100|rooftop|4\"]"), .Names = c("existence_full", 
"category_ids_text_search", "placerank_ml", "longitude", "name", 
"domain_aggregate", "admin_region", "languages", "region", "review_count", 
"geocode_level", "tel", "placerank", "post_town", "category_labels", 
"existence", "fax", "website", "status", "geocode_confidence", 
"postcode", "country", "category_ids", "_geocode_quality", "existence_ml", 
"email", "address", "_records_touched", "locality", "latitude", 
"geo_virtual")), uuid = "bb57a153-740f-42be-aa4d-ae12d4eb57d4"), .Names = c("payload", 






元のデータレイアウトのより広範な説明があったかもしれませんが、私はそのオブジェクトの高水準構造として何を見ているかに基づいて推測しています。 structuredat命名されていると仮定します。

> lapply(dat, names) 
[1] "payload" "uuid" 

[1] "payload" "uuid" 

[1] "payload" "uuid" 

[1] "payload" "uuid" 


newdat <- mapply(cbind, payloads, uuids) 


payloads <- lapply(dat, function(x) data.frame(x$payload)) 
uuids <- lapply(dat, function(x) data.frame(x$uuid)) 

のリストにそれらを抽出単一行のデータフレームが複数行のデータフレームに正しくコピーされているかどうかを確認してください。あなたの仕様にない機能はNAです。 「のUUID」は明らか識別子であるので、cbind操作が「ペイロード」と同じ長さの列に、その列の内容のそれぞれをコピーします

> lapply(payloads, dim) 
[1] 2 32 

[1] 2 33 

[1] 5 35 

[1] 1 32 

> lapply(uuids, dim) 
[1] 1 1 

[1] 1 1 

[1] 1 1 

[1] 1 1 

> lapply(mapply(cbind, payloads, uuids), dim) 
[1] 2 33 

[1] 2 34 

[1] 5 36 

[1] 1 33 


lapply(newdat, names) 
[1] "existence_full"   "geo_virtual"    "latitude"     
[4] "locality"     "X_records_touched"  "address"     
[7] "email"     "existence_ml"    "domain_aggregate"   
[10] "name"      "search_tags"    "admin_region"    
[13] "existence"    "category_labels.1"  "category_labels.2"  
[16] "post_town"    "region"     "review_count"    
[19] "geocode_level"   "tel"      "placerank"    
[22] "longitude"    "placerank_ml"    "fax"      
[25] "category_ids_text_search" "website"     "status"     
[28] "geocode_confidence"  "postcode"     "category_ids"    
[31] "country"     "X_geocode_quality"  "x.uuid"     

[1] "existence_full"   "geo_virtual"    "latitude"     
[4] "locality"     "X_records_touched"  "address"     
[7] "email"     "longitude"    "domain_aggregate"   
[10] "name"      "admin_region"    "search_tags"    
[13] "existence"    "category_labels.1"  "category_labels.2"  
[16] "category_labels.3"  "region"     "review_count"    
[19] "geocode_level"   "tel"      "placerank"    
[22] "post_town"    "placerank_ml"    "fax"      
[25] "category_ids_text_search" "website"     "status"     
[28] "geocode_confidence"  "postcode"     "category_ids"    
[31] "country"     "X_geocode_quality"  "existence_ml"    
[34] "x.uuid"     

[1] "existence_full"   "geo_virtual"    "latitude"     
[4] "locality"     "hours_display"   "X_records_touched"  
[7] "address"     "longitude"    "domain_aggregate"   
[10] "name"      "admin_region"    "search_tags"    
[13] "existence"    "category_labels.1"  "category_labels.2"  
[16] "region"     "review_count"    "geocode_level"   
[19] "tel"      "placerank"    "post_town"    
[22] "placerank_ml"    "fax"      "category_ids_text_search" 
[25] "website"     "status"     "hours"     
[28] "neighborhood"    "geocode_confidence"  "postcode"     
[31] "category_ids"    "country"     "X_geocode_quality"  
[34] "existence_ml"    "email"     "x.uuid"     

[1] "existence_full"   "category_ids_text_search" "placerank_ml"    
[4] "longitude"    "name"      "domain_aggregate"   
[7] "admin_region"    "languages"    "region"     
[10] "review_count"    "geocode_level"   "tel"      
[13] "placerank"    "post_town"    "category_labels.1"  
[16] "category_labels.2"  "existence"    "fax"      
[19] "website"     "status"     "geocode_confidence"  
[22] "postcode"     "country"     "category_ids"    
[25] "X_geocode_quality"  "existence_ml"    "email"     
[28] "address"     "X_records_touched"  "locality"     
[31] "latitude"     "geo_virtual"    "x.uuid"  

ハドレーのplyr -packageでrbind.fill機能は、これを効率的に行うことができます。

newdat3 <- do.call(plyr::rbind.fill, newdat) 


> newdat3[ , c("locality", "category_labels.3", "neighborhood")] 
     locality category_labels.3 neighborhood 
1  Stonehaven    <NA>   <NA> 
2  Stonehaven    <NA>   <NA> 
3  Inveraray  Restaurants   <NA> 
4  Inveraray  Restaurants   <NA> 
5 Windsor Castle    <NA>  Chalvey 
6 Windsor Castle    <NA>  Chalvey 
7 Windsor Castle    <NA>  Chalvey 
8 Windsor Castle    <NA>  Chalvey 
9 Windsor Castle    <NA>  Chalvey 
10  Pitmedden    <NA>   <NA> 