This project has retired. For details please refer to its Attic page.
Source code
001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.reef.examples.data.loading;
020
021import org.apache.hadoop.io.LongWritable;
022import org.apache.hadoop.io.Text;
023import org.apache.reef.annotations.audience.TaskSide;
024import org.apache.reef.io.data.loading.api.DataSet;
025import org.apache.reef.io.network.util.Pair;
026import org.apache.reef.task.Task;
027
028import javax.inject.Inject;
029import java.nio.charset.StandardCharsets;
030import java.util.logging.Level;
031import java.util.logging.Logger;
032
033/**
034 * The task that iterates over the data set to count the number of records.
035 * Assumes TextInputFormat and that records represent lines.
036 */
037@TaskSide
038public class LineCountingTask implements Task {
039
040  private static final Logger LOG = Logger.getLogger(LineCountingTask.class.getName());
041
042  private final DataSet<LongWritable, Text> dataSet;
043
044  @Inject
045  public LineCountingTask(final DataSet<LongWritable, Text> dataSet) {
046    this.dataSet = dataSet;
047  }
048
049  @Override
050  public byte[] call(final byte[] memento) throws Exception {
051    LOG.log(Level.FINER, "LineCounting task started");
052    int numEx = 0;
053    for (final Pair<LongWritable, Text> keyValue : dataSet) {
054      // LOG.log(Level.FINEST, "Read line: {0}", keyValue);
055      ++numEx;
056    }
057    LOG.log(Level.FINER, "LineCounting task finished: read {0} lines", numEx);
058    return Integer.toString(numEx).getBytes(StandardCharsets.UTF_8);
059  }
060}