コレクションからJavaのCopyManagerを使用したpostgresql DBへの高速挿入

グラフデータのシミュレーション/マイニングでは、高性能なPostgreSQL 9.5データベースに数千のデータを一度に書き込む必要があります。現在、準備されたステートメントと「行単位のINSERT」を使用しています。それはうまく動作しますが、遅すぎます。私はCopyManagerを介してPostgresql DBテーブルにCopy文を使ってコレクション（配列、ベクトル、またはArrayList）からデータを書き込む方法について何も見つけることができませんでした。私は簡単な例を得ることに感謝します。データ型を扱うには？ CopyManagerはデータ型を要求しません。コレクションからJavaのCopyManagerを使用したpostgresql DBへの高速挿入

私の現在のコード：

package io; 

import db.PostgresConnector; 
import entitiesP.Edge; 
import entitiesP.Graph; 
import entitiesP.Node; 
import java.sql.Connection; 
import java.sql.PreparedStatement; 
import java.sql.SQLException; 
import java.sql.Timestamp; 



public class DbLogger { 

public DbLogger(Graph g, int simNr) throws Exception { 

    Connection conn = new PostgresConnector().getConnection(); 

    //1. Graph Data File: 
     //------------------------------------------------- 

      //Filename und Header: 
      long timestamp = getTimeStamp(); 

      //Daten: 
      logGraphData(conn, simNr, 
         g.getNodes().size(), 
         g.getEdges().size(), 
         g.getRadiusAndDiameter()[0], 
         g.getRadiusAndDiameter()[1], 
         g.getRadiusAndDiameter()[2], 
         Node.maxDegree, 
         //new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(g.getSimStartTime()), 
         g.getSimStartTime(), 
         timestamp); 



     //2. Node Data File: 
     //------------------------------------------------- 

       //Daten einsammeln: 
       for (Node n: g.getNodes()){ 

        logNodeData(conn, simNr , 
            n.getId(), 
            n.getDegree(), 
            n.getClusteringCoefficient(), 
            timestamp); 

       } 


      //3. RelationData File: 
      //------------------------------------------------- 


       int[][] distance = g.getDistanceMatrix().clone(); 
       //AdMatrix dist = new AdMatrix(distance); 
       //dist.printAdMatrix(); 
       Object[] edges = g.getEdges().toArray(); 

       //adjacency nodes 
       int numNodes = g.getNodes().size(); 
       int numEdges = g.getEdges().size(); 

       //Edges: 
       for (int i=0; i < edges.length; i++){ 
        Object[] nodes = ((Edge) edges[i]).getNodes().toArray(); //pair of nodes of an edge 
        //pairs A--B 
        logRelationData(conn, simNr , 

           ((Node) nodes[0]).getId() , 
           ((Node) nodes[1]).getId() , 
           distance[((Node) nodes[0]).getId() -1][((Node) nodes[1]).getId() -1] , 
           ((Edge) edges[i]).getTieStrength() , 
           ((Edge) edges[i]).getNeighborhoodOverlap(((Node) nodes[0]),((Node) nodes[1])) , 
           timestamp 
         ); 
        //pairs B--A 
        logRelationData(conn, simNr , 

           ((Node) nodes[1]).getId() , 
           ((Node) nodes[0]).getId() , 
           distance[((Node) nodes[0]).getId() -1][((Node) nodes[1]).getId() -1] , 
           ((Edge) edges[i]).getTieStrength() , 
           ((Edge) edges[i]).getNeighborhoodOverlap(((Node) nodes[0]),((Node) nodes[1])) , 
           timestamp 
        ); 

        //end edges---------------------------- 

       } 


       //System.out.println("NumNodes: " + numNodes); 

       //non-adjacent nodes 
       for (int i=0; i < numNodes; i++){ 
        for (int j=0; j < numNodes; j++){ 
         if (distance[i][j] != 1 && i != j){ 
          logRelationData(conn, simNr, 
          (i+1), 
          (j+1), 
          (distance[i][j]==0? 999 : distance[i][j]) , //0 auf infinity setzen (999) 
          -1, 
          -1, 
          timestamp 
         ); 
         } 
        } 
       } 

       conn.close(); 
     } 



private static void logGraphData(Connection con, int sim_no, int numberOfNodes, int numberOfEdges, int radius, int diameter, int effDiameter, int maxDegree, long startTime, long timestamp) throws SQLException{ 

    PreparedStatement preStmt = con.prepareStatement("INSERT INTO public.GRAPH_DATA (" + "SIM_NO,NUMBER_OF_NODES,NUMBER_OF_EDGES,RADIUS,DIAMETER,EFF_DIAMETER,MAX_DEGREE,START_TIME,TIME_STAMP) VALUES (?,?,?,?,?,?,?,?,?)"); 
    preStmt.setInt(1, sim_no); 
    preStmt.setInt(2, numberOfNodes); 
    preStmt.setInt(3, numberOfEdges); 
    preStmt.setInt(4, radius); 
    preStmt.setInt(5, diameter); 
    preStmt.setInt(6, effDiameter); 
    preStmt.setInt(7, maxDegree); 
    preStmt.setTimestamp(8, new Timestamp(startTime)); 
    preStmt.setTimestamp(9, new Timestamp(timestamp)); 
    preStmt.executeUpdate(); 
    preStmt.close(); 

    //con.close(); 

    } 



private static void logNodeData(Connection con, int sim_no, int nodeId, int degree, double clusteringCoeff, long timeStamp) throws SQLException{ 

    PreparedStatement preStmt = con.prepareStatement("INSERT INTO public.NODE_DATA (" + "SIM_NO,NODE_ID,DEGREE,CLUSTERING_COEFFICIENT,TIME_STAMP) VALUES (?,?,?,?,?)"); 
    preStmt.setInt(1, sim_no); 
    preStmt.setInt(2, nodeId); 
    preStmt.setInt(3, degree); 
    preStmt.setDouble(4, clusteringCoeff); 
    preStmt.setTimestamp(5, new Timestamp(timeStamp)); 
    preStmt.executeUpdate(); 
    preStmt.close(); 
    //con.close(); 

    } 


private static void logRelationData(Connection con, int sim_no, int nodeId1, int nodeId2, int distance, int tieStrength, double neighborhoodOverlap, long timeStamp) throws SQLException{ 

    PreparedStatement preStmt = con.prepareStatement("INSERT INTO public.RELATION_DATA (" + "SIM_NO,NODE_ID1,NODE_ID2,DISTANCE,TIE_STRENGTH,NEIGHBORHOOD_OVERLAP,TIME_STAMP) VALUES (?,?,?,?,?,?,?)"); 
    preStmt.setInt(1, sim_no); 
    preStmt.setInt(2, nodeId1); 
    preStmt.setInt(3, nodeId2); 
    preStmt.setInt(4, distance); 
    preStmt.setInt(5, tieStrength); 
    preStmt.setDouble(6, neighborhoodOverlap); 
    preStmt.setTimestamp(7, new Timestamp(timeStamp)); 
    preStmt.executeUpdate(); 
    preStmt.close(); 
    //con.close(); 

    } 

public static long getTimeStamp(){ 
    long t = System.currentTimeMillis(); 
    //SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 
    //String timestamp = sdf.format(t); 
    return t; //timestamp; 
} 

}

出典

2016-06-30 Sven Temmler

各時間が遅いです。また、インサートをバッチしてデータを高速化する必要があります。 http://stackoverflow.com/questions/3784197/efficient-way-to-do-batch-inserts-with-jdbc – Konrad

ありがとうKonrad - 私はそれを試してみるつもりです。その間、私はpostgresql用の特別な一括挿入機能としてcopyManagerを扱う方法を見つけました。 –

私は私の問題の解決策を見つけた：それは非常に高速に動作します:)文を準備

package io; 


import db.PostgresConnector; 
import java.io.ByteArrayInputStream; 
import java.io.IOException; 
import java.sql.Connection; 
import java.sql.SQLException; 
import org.postgresql.copy.CopyManager; 
import org.postgresql.core.BaseConnection; 


public class DbLogger3 { 

public static void main (String args[]) throws Exception { 

     try { 
       Connection conn = new PostgresConnector().getConnection(); 

       CopyManager copyManager = new CopyManager((BaseConnection) conn); 

       String str = new String(); 

       //add 5000 data sets: (important: \r\n after each record to set a line feed (postgres interpretes that as the end of each dataset) 
       for (int i=0; i<5000; i++){ 
        str += "2;100;99;6.00;12.00;11.00;13;28.06.2016 00:08;28.06.2016 00:08"+"\r\n"; 
       } 

       //transform the String into bytes: 
       byte[] bytes = str.getBytes(); 

       //create ByteArrayInputStream object 
       ByteArrayInputStream input = new ByteArrayInputStream(bytes); 

       //insert into the database table (in my case: public.graph_data) 
       copyManager.copyIn("COPY public.graph_data FROM STDIN WITH DELIMITER ';'", input); 


     } catch (SQLException | IOException e) { 
       throw new Exception(e); 
     } 


    } 
}

出典

2016-07-04 22:44:15

コレクションからJavaのCopyManagerを使用したpostgresql DBへの高速挿入

答えて

関連する問題