This project has retired. For details please refer to its Attic page.
Source code
001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.reef.examples.data.loading;
020
021import org.apache.hadoop.io.LongWritable;
022import org.apache.hadoop.io.Text;
023import org.apache.reef.annotations.audience.TaskSide;
024import org.apache.reef.io.data.loading.api.DataSet;
025import org.apache.reef.io.network.util.Pair;
026import org.apache.reef.task.Task;
027
028import javax.inject.Inject;
029import java.util.logging.Level;
030import java.util.logging.Logger;
031
032/**
033 * The task that iterates over the data set to count the number of records.
034 * Assumes TextInputFormat and that records represent lines.
035 */
036@TaskSide
037public class LineCountingTask implements Task {
038
039  private static final Logger LOG = Logger.getLogger(LineCountingTask.class.getName());
040
041  private final DataSet<LongWritable, Text> dataSet;
042
043  @Inject
044  public LineCountingTask(final DataSet<LongWritable, Text> dataSet) {
045    this.dataSet = dataSet;
046  }
047
048  @Override
049  public byte[] call(final byte[] memento) throws Exception {
050    LOG.log(Level.FINER, "LineCounting task started");
051    int numEx = 0;
052    for (final Pair<LongWritable, Text> keyValue : dataSet) {
053      // LOG.log(Level.FINEST, "Read line: {0}", keyValue);
054      ++numEx;
055    }
056    LOG.log(Level.FINER, "LineCounting task finished: read {0} lines", numEx);
057    return Integer.toString(numEx).getBytes();
058  }
059}