Month: July 2016

Spring Batch Easy Example – from csv to csv file

Batch processing is the execution of a series of programs (“jobs”) on a computer without manual intervention.

Spring Batch provides mechanisms for processing large amount of data like transaction management, job processing, resource management, logging, tracing, conversion of data, interfaces, etc.
These functionalities are available out of the box and can be reused by applications containing the Spring Batch framework.

In this tutorial, we will show you how to configure a Spring Batch job to read CSV file into a CSV file, and filter out the record before writing with ItemProcessor. Its a very easy program for beginners.

Tools and libraries used

  1. Maven 3
  2. Eclipse Luna
  3. JDK 1.7
  4. Spring Core 3.2.2.RELEASE
  5. Spring Batch 2.2.0.RELEASE
  6. Spring OXM 3.2.2.RELEASE

1. Create a maven project . I named my project as SpringBatchProject.

2. Project Dependencies –

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.solution.springbatch</groupId>
  <artifactId>SpringBatchProject</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <url>http://maven.apache.org</url>
  
  <properties>
        <jdk.version>1.7</jdk.version>
        <spring.version>3.2.2.RELEASE</spring.version>
        <spring.batch.version>2.2.0.RELEASE</spring.batch.version>
         <quartz.version>2.2.1</quartz.version>
    </properties>
    
    <dependencies>

        <!-- Spring Core --> 
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-core</artifactId>
            <version>${spring.version}</version>
        </dependency>

        <!-- Spring XML to/back object -->
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-oxm</artifactId>
            <version>${spring.version}</version>
        </dependency>
        <!-- Spring Batch dependencies -->
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-core</artifactId>
            <version>${spring.batch.version}</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-infrastructure</artifactId>
            <version>${spring.batch.version}</version>
        </dependency>

        <!-- Spring Batch unit test -->
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-test</artifactId>
            <version>${spring.batch.version}</version>
        </dependency>
     <dependency>
            <groupId>org.quartz-scheduler</groupId>
            <artifactId>quartz</artifactId>
            <version>${quartz.version}</version>
        </dependency>
        <!-- Junit -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>

    </dependencies>
    <build>
        <finalName>spring-batch</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-eclipse-plugin</artifactId>
                <version>2.9</version>
                <configuration>
                    <downloadSources>true</downloadSources>
                    <downloadJavadocs>false</downloadJavadocs>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.3.2</version>
                <configuration>
                    <source>${jdk.version}</source>
                    <target>${jdk.version}</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

3. Project Structure –

project_structure

4. CSV file resources/files/input.csv

1001,iryna,31,31/08/1982,200000
1003,john,29,21/08/1984,1000000
1004,brett,29,21/03/1984,80000.89
1002,jane,30,21/04/1992,500000
1005,anee,27,14/06/1992,500000

5. Read CSV file resources/jobs/job-report.xml

<!-- read csv file-->

<bean id="cvsFileItemReader" class="org.springframework.batch.item.file.FlatFileItemReader">
        <property name="resource" value="classpath:files/input.csv" />
             <property name="lineMapper">
            <bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
              <!-- split it -->
              <property name="lineTokenizer">
                    <bean
                  class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
                    <property name="names" value="refId, name, age, csvDob, income" />
                </bean>
              </property>
              <property name="fieldSetMapper">   


<!-- map with Report bean -->

             <bean
                class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
                <property name="prototypeBeanName" value="report" />
              </bean>      
              </property>

              </bean>
          </property>
    </bean>

6. The csv file mapped to Pojo Report.java

package com.solution.model;

import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.Date;

public class Report {

    private int refId;
    private String name;
    private int age;
    private Date dob;
    private BigDecimal income;
    
    
    public int getRefId() {
        return refId;
    }

    public void setRefId(int refId) {
        this.refId = refId;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

    public Date getDob() {
        return dob;
    }

    public void setDob(Date dob) {
        this.dob = dob;
    }

    public BigDecimal getIncome() {
        return income;
    }

    public void setIncome(BigDecimal income) {
        this.income = income;
    }
    
    public String getCsvDob() {

        SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MM/yyyy");
        return dateFormat.format(getDob());
      }
}

7. Spring batch Core Settings

Define jobRepository and jobLauncher

resources/config/context.xml
<beans xmlns="http://www.springframework.org/schema/beans"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="
	http://www.springframework.org/schema/beans 
	http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">

    <!-- stored job-meta in memory --> 
    <bean id="jobRepository"
	class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean">
	<property name="transactionManager" ref="transactionManager" />
    </bean>
 	
    <bean id="transactionManager"
	class="org.springframework.batch.support.transaction.ResourcelessTransactionManager" />
	
 
    <bean id="jobLauncher"
	class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
	<property name="jobRepository" ref="jobRepository" />
    </bean>

</beans>

8. Spring batch Jobs

A Spring batch job, read the report.csvfile, map it to Report object, and write it into a csv file.

<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:batch="http://www.springframework.org/schema/batch" xmlns:task="http://www.springframework.org/schema/task"
    xmlns:context="http://www.springframework.org/schema/context"
    xmlns:util="http://www.springframework.org/schema/util" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.springframework.org/schema/batch
        http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
        http://www.springframework.org/schema/beans 
        http://www.springframework.org/schema/beans/spring-beans-3.2.xsd
        http://www.springframework.org/schema/util 
        http://www.springframework.org/schema/util/spring-util-3.2.xsd
        http://www.springframework.org/schema/task
        http://www.springframework.org/schema/task/spring-task-3.2.xsd
        http://www.springframework.org/schema/context
        http://www.springframework.org/schema/context/spring-context.xsd">
    
    <context:component-scan base-package="com.solution.scheduler" />
    
    <bean id="report" class="com.solution.model.Report" scope="prototype" />
    <batch:job id="reportJob" restartable="true">
        <batch:step id="step1">
            <batch:tasklet>
                <batch:chunk reader="cvsFileItemReader" writer="cvsFileItemWriter" processor="filterReportProcessor"
                    commit-interval="1">
                </batch:chunk>
            </batch:tasklet>
        </batch:step>
    </batch:job>

    <bean id="filterReportProcessor" class="com.solution.processor.FilterReportProcessor" />

    <bean id="cvsFileItemReader" class="org.springframework.batch.item.file.FlatFileItemReader">
        <property name="resource" value="classpath:files/input.csv" />
             <property name="lineMapper">
            <bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
              <!-- split it -->
              <property name="lineTokenizer">
                    <bean
                  class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
                    <property name="names" value="refId, name, age, csvDob, income" />
                </bean>
              </property>
              <property name="fieldSetMapper">   


                <bean
                class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
                <property name="prototypeBeanName" value="report" />
              </bean>      
              </property>

              </bean>
          </property>
    </bean>


    <bean id="cvsFileItemWriter" class="org.springframework.batch.item.file.FlatFileItemWriter">

        <!-- write to this csv file -->
        <property name="resource" value="file:csv/report.csv" />
        <property name="shouldDeleteIfExists" value="true" />

        <property name="lineAggregator">
            <bean
                class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
                <property name="delimiter" value="," />
                <property name="fieldExtractor">
                    <bean
                        class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
                        <property name="names" value="refId, name, age, csvDob, income" />
                    </bean>
                </property>
            </bean>
        </property>

    </bean>
    <bean id="runScheduler" class="com.solution.scheduler.RunScheduler" />
 <!-- Run every 5 seconds -->
  <task:scheduled-tasks>
  
    <task:scheduled ref="runScheduler" method="run" cron="*/5 * * * * *" />
   </task:scheduled-tasks>
</beans>

9. Spring Batch – ItemProcessor

In Spring batch, the wired Processor will be fired before writing to any resources, so, this is the best place to handle any conversion, filtering and business logic. In this example, the Report object will be ignored (not write to csv file) if its’ age is greater than equal to 30.

package com.solution.processor;

import org.springframework.batch.item.ItemProcessor;

import com.solution.model.Report;


//run before writing
public class FilterReportProcessor implements ItemProcessor<Report, Report> {

    @Override
    public Report process(Report item) throws Exception {

        //filter object which age > 30
        if(item.getAge()>30){
            return null; // null = ignore this object
        }
        return item;
    }

}

10. I have scheduled this process which will run in every 5 seconds through cron jobs.

RunScheduler.java

package com.solution.scheduler;

import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

@Component
public class RunScheduler {

  @Autowired
  private JobLauncher jobLauncher;

  @Autowired
  private Job job;

  public void run() {

      try {
          JobParameters jobParameters = 
                  new JobParametersBuilder()
                  .addLong("time",System.currentTimeMillis()).toJobParameters();
            JobExecution execution = jobLauncher.run(job, jobParameters);
            System.out.println("Exit Status : " + execution.getStatus());

        } catch (Exception e) {
            e.printStackTrace();
        }

  }
}

11. Run the Main class now

package com.solution.scheduler;

import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

public class App {
    public static void main(String[] args) {

        String[] springConfig  = 
            {    
                "config/context.xml",
                "jobs/job-report.xml" 
            };
        
        ApplicationContext context = 
                new ClassPathXmlApplicationContext(springConfig);
        
    

    }
}

12. output csv file i.e. report.csv

1003,john,29,08/09/1985,1000000
1004,brett,29,03/09/1985,80000.89
1002,jane,30,04/09/1993,500000
1005,anee,27,06/02/1993,500000

 

 

 

Advertisement