diff --git a/interview-mr/pom.xml b/interview-mr/pom.xml
index e04d5f2..928749b 100644
--- a/interview-mr/pom.xml
+++ b/interview-mr/pom.xml
@@ -39,7 +39,7 @@
junit
junit
- 3.8.1
+ 4.11
test
@@ -48,10 +48,5 @@
2.6.0
provided
-
- com.google.guava
- guava
- 16.0
-
diff --git a/interview-mr/src/main/data/junit-data.tsv b/interview-mr/src/main/data/junit-data.tsv
new file mode 100644
index 0000000..bba5ba7
--- /dev/null
+++ b/interview-mr/src/main/data/junit-data.tsv
@@ -0,0 +1,7 @@
+Dummy Line
+jewelry 0.504193 http://411mania.com/movies/comics-411-03-26-14-favorite-marvel-superhero-team-edition/ http://411mania.com/movies/comics-411-03-26-14-favorite-marvel-superhero-team-edition/ Sovrn
+literature-language 0.556966 http://411mania.com/wrestling/411-fact-or-fiction-09-06-12-punkheyman-d-bry-kane-hug-it-out-more/ http://411mania.com/wrestling/411-fact-or-fiction-09-06-12-punkheyman-d-bry-kane-hug-it-out-more/ Sovrn
+education 0.931712 http://footballscoop.com/news/brady-hoke-let-go-michigan/ brady-hoke-let-go-michigan/ Sovrn
+jewelry jewelry http://411mania.com/movies/comics-411-03-26-14-favorite-marvel-superhero-team-edition/ http://411mania.com/movies/comics-411-03-26-14-favorite-marvel-superhero-team-edition/ Sovrn
+internet 0.897649 http://starcasm.net/archives/147255 http://starcasm.net/archives/147255 Sovrn
+sports 0.506808 http://totalfratmove.com/senior-uga-wide-receiver-chris-conley-trolls-freshman-running-back-nick-chubb-on-twitter/ http://totalfratmove.com/senior-uga-wide-receiver-chris-conley-trolls-freshman-running-back-nick-chubb-on-twitter/ Sovrn
diff --git a/interview-mr/src/main/java/com/sovrn/interview/mr/AverageScore.java b/interview-mr/src/main/java/com/sovrn/interview/mr/AverageScore.java
index 9cf90fc..6f9af83 100644
--- a/interview-mr/src/main/java/com/sovrn/interview/mr/AverageScore.java
+++ b/interview-mr/src/main/java/com/sovrn/interview/mr/AverageScore.java
@@ -1,6 +1,8 @@
package com.sovrn.interview.mr;
import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -19,7 +21,7 @@
* 2.) The score is the 2nd column of the data file
* 3.) The normalized URL is the 4th column of the data file.
*
- * The tab seperated data file can be found under src/main/data/data.tsv
+ * The tab separated data file can be found under src/main/data/data.tsv
*
* Example data:
*
@@ -31,38 +33,78 @@
* navigation 0.615594 http://411mania.com/games/dragon-fantasy-book-one-psn-review/ http://411mania.com/games/dragon-fantasy-book-one-psn-review/ Sovrn
*/
public class AverageScore {
+
public static void main(String[] args) throws Exception {
+ if(args.length < 2)
+ System.exit(2);
+ System.exit(mapReduce(args[0], args[1]));
+ }
+
+ public static int mapReduce(String inputPath, String outputPath) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Interview Averaging");
job.setJarByClass(AverageScore.class);
job.setMapperClass(AverageScoreMapper.class);
job.setReducerClass(AverageScoreReducer.class);
- // TODO: Finish the key and output setup
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(FloatWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
+ FileInputFormat.addInputPath(job, new Path(inputPath));
+ FileOutputFormat.setOutputPath(job, new Path(outputPath));
- System.exit(job.waitForCompletion(true) ? 0 : 1);
+ return job.waitForCompletion(true) ? 0 : 1;
}
public static class AverageScoreMapper extends Mapper