001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.reef.examples.data.loading; 020 021import org.apache.hadoop.io.LongWritable; 022import org.apache.hadoop.io.Text; 023import org.apache.reef.annotations.audience.TaskSide; 024import org.apache.reef.io.data.loading.api.DataSet; 025import org.apache.reef.io.network.util.Pair; 026import org.apache.reef.task.Task; 027 028import javax.inject.Inject; 029import java.util.logging.Level; 030import java.util.logging.Logger; 031 032/** 033 * The task that iterates over the data set to count the number of records. 034 * Assumes TextInputFormat and that records represent lines. 035 */ 036@TaskSide 037public class LineCountingTask implements Task { 038 039 private static final Logger LOG = Logger.getLogger(LineCountingTask.class.getName()); 040 041 private final DataSet<LongWritable, Text> dataSet; 042 043 @Inject 044 public LineCountingTask(final DataSet<LongWritable, Text> dataSet) { 045 this.dataSet = dataSet; 046 } 047 048 @Override 049 public byte[] call(final byte[] memento) throws Exception { 050 LOG.log(Level.FINER, "LineCounting task started"); 051 int numEx = 0; 052 for (final Pair<LongWritable, Text> keyValue : dataSet) { 053 // LOG.log(Level.FINEST, "Read line: {0}", keyValue); 054 ++numEx; 055 } 056 LOG.log(Level.FINER, "LineCounting task finished: read {0} lines", numEx); 057 return Integer.toString(numEx).getBytes(); 058 } 059}