SyntaxHighlighter

Wednesday, May 29, 2013

Spring Batch file processing

Spring Batch file processing

here's a simple little config to process a file;

first the job setup;



 
  
   
    
   
  
  
   
    
   
  
 

 
  
  
  
 
 
 
  
  
 
 
 
 
  
  
 
 
 
  
 
 
 
  
  
   
  
  
 



then the batch resources;



 

 
  
 
 
  
  
 
 
 
  
   
   
  
  
  
  
  
   
  
 



now, the entity we're going to load to;

package de.incompleteco.spring.batch.domain;

import java.io.Serializable;

public class SimpleEntity implements Serializable {

 private static final long serialVersionUID = 1L;

 private String col1;
 
 private String col2;
 
 private String col3;

 public String getCol1() {
  return col1;
 }

 public void setCol1(String col1) {
  this.col1 = col1;
 }

 public String getCol2() {
  return col2;
 }

 public void setCol2(String col2) {
  this.col2 = col2;
 }

 public String getCol3() {
  return col3;
 }

 public void setCol3(String col3) {
  this.col3 = col3;
 }
 
 
 
}


and here's an integration test to make it all work;

package de.incompleteco.spring.batch;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.FileOutputStream;

import javax.sql.DataSource;

import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.explore.JobExplorer;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration({"classpath:/META-INF/spring/*-context.xml"})
@ActiveProfiles("junit")
public class FileJobIntegrationTest {

 @Autowired
 private Job job;
 
 @Autowired
 private JobLauncher jobLauncher;
 
 @Autowired
 private JobExplorer jobExplorer;
 
 @Autowired
 private DataSource dataSource;
 
 private int recordCount = 1000000;
 
 private String fileName = System.getProperty("java.io.tmpdir") + File.separator + "test.csv";
 
 @Before
 public void before() throws Exception {
  if (new File(fileName).exists()) {
   new File(fileName).delete();
  }//end if
 }
 
 @Test
 public void test() throws Exception {
  //create a file
  FileOutputStream fos = new FileOutputStream(fileName);
  fos.write("col1,col2,col3".getBytes());
  fos.flush();
  for (int i=0;i<=recordCount;i++) {
   fos.write(new String(i + "," + (i+1) + "," + (i+2) + "\n").getBytes());
   fos.flush();//flush it
  }//end for
  fos.close();
  //lets get the size of the file
  long length = new File(fileName).length();
  System.out.println("file size: " + ((length / 1024) / 1024));
  //execute the job
  JobParameters jobParameters = new JobParametersBuilder().addString("fileName",fileName).toJobParameters();
  JobExecution execution = jobLauncher.run(job,jobParameters);
  //monitor
  while (jobExplorer.getJobExecution(execution.getId()).isRunning()) {
   Thread.sleep(1000);
  }//end while
  //load again
  execution = jobExplorer.getJobExecution(execution.getId());
  //test
  assertEquals(ExitStatus.COMPLETED.getExitCode(),execution.getExitStatus().getExitCode());
  //lets see what's in the database
  int count = new JdbcTemplate(dataSource).queryForObject("select count(*) from simple_entity", Integer.class);
  //test
  assertTrue(count == recordCount);
 }
 
}


now, testing locally, the file generated by the test is ~ 19MB and the heap goes up, but does not exhaust at 1,000,000 records.  it's a neat little example of processing a big file, quickly and cleanly into a database.

No comments:

Post a Comment