2017-07-07 6 views
0

私は以下のフォーマットのxmlを持っています。 xml(job_type = "REQUESTED"とjob_type = "RECOMMENDED")の2つの部分があります。私は要求されたブロックからの値だけを解析したい。 も出願タグにIは、以下の形式でCSVに結果を解析するXDocumentを使ってネストされたXMLを解析する

アプリケーションの2つのタイプ(タイプ=「PB」とタイプ=「CB」)がある

id , social_security_number (where type = "PB"), first_name(where type = "PB"), city(where type = "PB" and item_code="CURRENT"), state_code_id(where type = "PB" and item_code="CURRENT"), com(where item_code="PEMAIL" and type ="PB"), social_security_number (where type = "CB"), first_name(where type = "CB"), city(where type = "CB" and item_code="CURRENT"), state_code_id(where type = "CB" and item_code="CURRENT"), com(where item_code="PEMAIL" and type ="CB") 

例:結果:

2407132 ,999999999, Thomas, Portland, MI, [email protected], 123456789, Mary, BarHarBor, MI, [email protected] 

<?xml version="1.0" encoding="utf-8"?> 
<JobApplications xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="2407132" bundle_id="2407132" version="1.0"> 
<JobApplication job_type="REQUESTED" request_date="2014-08-02T12:26:00.0000000"> 
<JobApplicationStates> 
<JobApplicationState type="USEDCL" payment_call_flag="False"> 
<Applicants> 
<Applicant social_security_number="999999999" type="PB" date_of_birth="1972-10-01T00:00:00.0000000" first_name="Thomas" last_name="Edison"> 
<Addresses> 
<Address city="Portland" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" /> 
<Address item_code="PREVIOUS" /> 
</Addresses> 
<Communications> 
<Communication item_code="PEMAIL" com="[email protected]" contact_type="CU"/> 
<Communication item_code="HOME" com="(123)-456-7890" contact_type="CU"/> 
<Communication item_code="OTHER" contact_type="CU"/> 
<Communication item_code="WORK" com="(100)-200-3000" contact_type="CU"/> 
</Communications> 
</Applicant> 
<Applicant social_security_number="123456789" type="CB" date_of_birth="1976-10-01T00:00:00.0000000" first_name="Mary" last_name="Edison"> 
<Addresses> 
<Address city="BarHarBor" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" /> 
<Address item_code="PREVIOUS" /> 
</Addresses> 
<Communications> 
<Communication item_code="PEMAIL" com="[email protected]" contact_type="CU"/> 
<Communication item_code="HOME" com="(999)-456-7890" contact_type="CU"/> 
<Communication item_code="OTHER" contact_type="CU"/> 
<Communication item_code="WORK" com="(300)-200-3000" contact_type="CU"/> 
</Communications> 
</Applicant> 
</Applicants> 
</JobApplicationState> 
</JobApplicationStates> 
</JobApplication> 
<JobApplication job_type="RECOMMENDED" request_date="2014-08-02T12:26:00.0000000"> 
<JobApplicationStates> 
<JobApplicationState type="USEDCL" payment_call_flag="False"> 
<Applicants> 
<Applicant social_security_number="999999999" type="PB" date_of_birth="1972-10-01T00:00:00.0000000" first_name="Thomas" last_name="Edison"> 
<Addresses> 
<Address city="Portland" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" /> 
<Address item_code="PREVIOUS" /> 
</Addresses> 
<Communications> 
<Communication item_code="PEMAIL" com="[email protected]" contact_type="CU"/> 
<Communication item_code="HOME" com="(123)-456-7890" contact_type="CU"/> 
<Communication item_code="OTHER" contact_type="CU"/> 
<Communication item_code="WORK" com="(100)-200-3000" contact_type="CU"/> 
</Communications> 
</Applicant> 
<Applicant social_security_number="123456789" type="CB" date_of_birth="1976-10-01T00:00:00.0000000" first_name="Mary" last_name="Edison"> 
<Addresses> 
<Address city="BarHarBor" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" /> 
<Address item_code="PREVIOUS" /> 
</Addresses> 
<Communications> 
<Communication item_code="PEMAIL" com="[email protected]" contact_type="CU"/> 
<Communication item_code="HOME" com="(999)-456-7890" contact_type="CU"/> 
<Communication item_code="OTHER" contact_type="CU"/> 
<Communication item_code="WORK" com="(300)-200-3000" contact_type="CU"/> 
</Communications> 
</Applicant> 
</Applicants> 
</JobApplicationState> 
</JobApplicationStates> 
</JobApplication> 

XDocumentを使用して必要な形式でどのように構文解析するのですか?私は解析するために数百万のXMLを持っています。

答えて

0

yoはxmlファイルをフラットにすることが多いので、データベースに入れて読みやすくなります。あなたの場合、私はこれが最良の方法だと思います。以下のコードを参照してください。私は、解析が完了した後にデータテーブルの結果をフィルタリングできるときに、特定のアイテムを解析するだけで、解析メソッドを複雑にすることをお勧めしません。

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 
using System.Data; 
using System.Xml; 
using System.Xml.Linq; 

namespace ConsoleApplication1 
{ 
    class Program 
    { 
     const string FILENAME = @"c:\temp\test.xml"; 
     static void Main(string[] args) 
     { 

      DataTable dt = new DataTable(); 

      dt.Columns.Add("ID", typeof(int)); 
      dt.Columns.Add("Job Type", typeof(string)); 
      dt.Columns.Add("Request Date", typeof(DateTime)); 
      dt.Columns.Add("Job State Type",typeof(string)); 
      dt.Columns.Add("Payment Call Flag", typeof(Boolean)); 
      dt.Columns.Add("SSN", typeof(string)); 
      dt.Columns.Add("Appliacant Type", typeof(string)); 
      dt.Columns.Add("DOB", typeof(DateTime)); 
      dt.Columns.Add("First Name", typeof(string)); 
      dt.Columns.Add("Last Name", typeof(string)); 

      dt.Columns.Add("City", typeof(string)); 
      dt.Columns.Add("State", typeof(string)); 
      dt.Columns.Add("Country", typeof(string)); 
      dt.Columns.Add("Postal Code", typeof(string)); 
      dt.Columns.Add("Street Number", typeof(string)); 
      dt.Columns.Add("Street", typeof(string)); 
      dt.Columns.Add("Email", typeof(string)); 
      dt.Columns.Add("Home Phone", typeof(string)); 
      dt.Columns.Add("Other", typeof(string)); 
      dt.Columns.Add("Work Phone", typeof(string)); 


      XDocument doc = XDocument.Load(FILENAME); 

      XElement jobApplications = doc.Root; 
      int id = (int)jobApplications.Attribute("id"); 

      foreach (XElement jobApplication in jobApplications.Elements("JobApplication")) 
      { 
       string job_type = (string)jobApplication.Attribute("job_type"); 
       DateTime request_date = (DateTime)jobApplication.Attribute("request_date"); 
       string job_state_type = (string)jobApplication.Descendants("JobApplicationState").FirstOrDefault().Attribute("type"); 
       Boolean payment_call_flag = (Boolean)jobApplication.Descendants("JobApplicationState").FirstOrDefault().Attribute("payment_call_flag"); 

       foreach (XElement applicant in jobApplication.Descendants("Applicant")) 
       { 
        string social_security_number = (string)applicant.Attribute("social_security_number"); 
        string applicant_type = (string)applicant.Attribute("type"); 
        DateTime date_of_birth = (DateTime)applicant.Attribute("date_of_birth"); 
        string first_name = (string)applicant.Attribute("first_name"); 
        string last_name = (string)applicant.Attribute("last_name"); 


        XElement address = applicant.Descendants("Address").Where(x => (string)x.Attribute("item_code") == "CURRENT").FirstOrDefault(); 
        string city = (string)address.Attribute("city"); 
        string state = (string)address.Attribute("state_code_id"); 
        string country = (string)address.Attribute("country_code"); 
        string postal_code = (string)address.Attribute("postal_code"); 
        string street_number = (string)address.Attribute("street_number"); 
        string street = (string)address.Attribute("street"); 

        XElement communications = applicant.Descendants("Communications").FirstOrDefault(); 
        string email = communications.Elements().Where(x => (string)x.Attribute("item_code") == "PEMAIL").Select(x => (string)x.Attribute("com")).FirstOrDefault(); 
        string home_phone = communications.Elements().Where(x => (string)x.Attribute("item_code") == "HOME").Select(x => (string)x.Attribute("com")).FirstOrDefault(); 
        string other = communications.Elements().Where(x => (string)x.Attribute("item_code") == "OTHER").Select(x => (string)x.Attribute("com")).FirstOrDefault(); 
        string work_phone = communications.Elements().Where(x => (string)x.Attribute("item_code") == "WORK").Select(x => (string)x.Attribute("com")).FirstOrDefault(); 

        dt.Rows.Add(new object[] { 
         id, 
         job_type, request_date, job_state_type, payment_call_flag, 
         social_security_number, applicant_type, date_of_birth, first_name, last_name, 
         city, state, country, postal_code, street_number, street, 
         email, home_phone, other, work_phone 
        }); 
       } 
      } 

     } 
    } 
} 
+0

私は欲しいものを得るためにそれを微調整しました。私は基本的にアイデアが好きだった! – HadoopAddict

0

これはXMLとしてオープンソースのライブラリCinchoo ETL

を使用して数行のコードでcsvファイルにXMLファイルを変換するお手伝いをする必要があるが、構造形式のファイル、あなたが生成するために平らにする必要がありますですCSV。 XPathとCinchoo ETLライブラリの助けを借りて、あなたはすぐにCSVファイルを生成することができます。

2407132,999999999,Thomas,Portland,MI,[email protected],123456789,Mary,BarHarBor,MI,[email protected] 

情報開示:私はこのライブラリの作者だ下記

サンプルコードは、XMLを解析し、

using (var parser = new ChoXmlReader("sample.xml").WithXPath("JobApplications") 
    .WithField("ID", xPath: "@id") 
    .WithField("PB_SSN", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/@social_security_number") 
    .WithField("PB_FIRST_NAME", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/@first_name") 
    .WithField("PB_CITY", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/Addresses/Address[@item_code='CURRENT']/@city") 
    .WithField("PB_STATE", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/Addresses/Address[@item_code='CURRENT']/@state_code_id") 
    .WithField("PB_PEMAIL", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/Communications/Communication[@item_code='PEMAIL']/@com") 
    .WithField("CB_SSN", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/@social_security_number") 
    .WithField("CB_FIRST_NAME", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/@first_name") 
    .WithField("CB_CITY", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/Addresses/Address[@item_code='CURRENT']/@city") 
    .WithField("CB_STATE", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/Addresses/Address[@item_code='CURRENT']/@state_code_id") 
    .WithField("CB_PEMAIL", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/Communications/Communication[@item_code='PEMAIL']/@com") 
) 
{ 
    using (var writer = new ChoCSVWriter("sample.csv")) 
     writer.Write(parser); 
} 

出力をCSVファイルに変換する方法を示しています。

関連する問題