001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.reef.examples.data.loading; 020 021import org.apache.hadoop.io.LongWritable; 022import org.apache.hadoop.io.Text; 023import org.apache.reef.annotations.audience.TaskSide; 024import org.apache.reef.io.data.loading.api.DataSet; 025import org.apache.reef.io.network.util.Pair; 026import org.apache.reef.task.Task; 027 028import javax.inject.Inject; 029import java.nio.charset.StandardCharsets; 030import java.util.logging.Level; 031import java.util.logging.Logger; 032 033/** 034 * The task that iterates over the data set to count the number of records. 035 * Assumes TextInputFormat and that records represent lines. 036 */ 037@TaskSide 038public class LineCountingTask implements Task { 039 040 private static final Logger LOG = Logger.getLogger(LineCountingTask.class.getName()); 041 042 private final DataSet<LongWritable, Text> dataSet; 043 044 @Inject 045 public LineCountingTask(final DataSet<LongWritable, Text> dataSet) { 046 this.dataSet = dataSet; 047 } 048 049 @Override 050 public byte[] call(final byte[] memento) throws Exception { 051 LOG.log(Level.FINER, "LineCounting task started"); 052 int numEx = 0; 053 for (final Pair<LongWritable, Text> keyValue : dataSet) { 054 // LOG.log(Level.FINEST, "Read line: {0}", keyValue); 055 ++numEx; 056 } 057 LOG.log(Level.FINER, "LineCounting task finished: read {0} lines", numEx); 058 return Integer.toString(numEx).getBytes(StandardCharsets.UTF_8); 059 } 060}