Final: Question 1
Please download the Enron email dataset enron.zip, unzip it and then restore it using mongorestore. It should restore to a collection called "messages" in a database called "enron". Note that this is an abbreviated version of the full corpus. There should be 120,477 documents after restore.
Inspect a few of the documents to get a basic understanding of the structure. Enron was an American corporation that engaged in a widespread accounting fraud and subsequently failed.
In this dataset, each document is an email message. Like all Email messages, there is one sender but there can be multiple recipients.
Construct a query to calculate the number of messages sent by Andrew Fastow, CFO, to Jeff Skilling, the president. Andrew Fastow's email addess was andrew.fastow@enron.com. Jeff Skilling's email was jeff.skilling@enron.com.
For reference, the number of email messages from Andrew Fastow to John Lavorato (john.lavorato@enron.com) was 1.
db.messages.aggregate({$project:{'headers.From':1, 'headers.To':1}},{$match:{'headers.From':'andrew.fastow@enron.com'}},{$unwind:'$headers.To'},{$match:{'headers.To':'jeff.skilling@enron.com'}},{$group:{_id:{from:'$headers.From', to:'$headers.To'},count:{$sum:1}}})
Please use the Enron dataset you imported for the previous problem. For this question you will use the aggregation framework to figure out pairs of people that tend to communicate a lot. To do this, you will need to unwind the To list for each message.
This problem is a little tricky because a recipient may appear more than once in the To list for a message. You will need to fix that in a stage of the aggregation before doing your grouping and counting of (sender, recipient) pairs.
Which pair of people have the greatest number of messages in the dataset?
var client = require('mongodb').MongoClient;
var pairs = [
{from:'susan.mara@enron.com', to:'jeff.dasovich@enron.com'},
{from:'susan.mara@enron.com', to:'richard.shapiro@enron.com'},
{from:'soblander@carrfut.com', to:'soblander@carrfut.com'},
{from:'susan.mara@enron.com', to:'james.steffes@enron.com'},
{from:'evelyn.metoyer@enron.com', to:'kate.symes@enron.com'},
{from:'susan.mara@enron.com', to:'alan.comnes@enron.com'}
];
client.connect('mongodb://localhost:27017/enron', function(err, db) {
if (err) throw err;
var count = pairs.length;
pairs.forEach(function(pair, index, array) {
var pipeline = [
{$project:{'headers.From':1, 'headers.To':1}},
{$match:{'headers.From': pair.from}},
{$unwind:'$headers.To'},
{$match:{'headers.To': pair.to}},
{$group:{
_id:{id: '$_id', from:'$headers.From'},
to:{$addToSet: '$headers.To'}
}},
{$unwind:'$to'},
{$group:{
_id:{from:'$_id.from', to:'$to'},
count: {$sum: 1}
}},
];
db.collection('messages', function(err, collection) {
if (err) throw err;
collection.aggregate(pipeline, function(err, result) {
console.dir(result);
count--;
if (count == 0) db.close()
});
});
});
});
Final: Question 3
In this problem you will update a document in the Enron dataset to illustrate your mastery of updating documents from the shell.
Please add the email address "mrpotatohead@mongodb.com" to the list of addresses in the "headers.To" array for the document with "headers.Message-ID" of "<8147308.1075851042335.JavaMail.evans@thyme>"
After you have completed that task, please download Final3.zip and run final3-validate.js to get the validation code and put it in the box below without any extra spaces. The validation script assumes that it is connecting to a simple mongo instance on the standard port on localhost.
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017/enron', function(err, db) {
if(err) throw err;
db.collection('messages', function(err, collection) {
if(err) throw err;
collection.update(
{'headers.Message-ID':'<8147308.1075851042335.JavaMail.evans@thyme>'},
{ $push : {'headers.To': 'mrpotatohead@mongodb.com'}},
function(err, result){
if(err) throw err;
console.log(result);
return db.close();
});
});
});
Final: Question 4
Enhancing the Blog to support viewers liking certain comments
In this problem, you will be enhancing the blog project to support users liking certain comments and the like counts showing up the in the permalink page.
Start by downloading the code in Final4.zip and loading up the blog dataset posts.json. The user interface has already been implemented for you. It's not fancy. The /post URL shows the like counts next to each comment and displays a Like button that you can click on. That Like button POSTS to the /like URL on the blog, makes the necessary changes to the database state (you are implementing this), and then redirects the browser back to the permalink page.
This full round trip and redisplay of the entire web page is not how you would implement liking in a modern web app, but it makes it easier for us to reason about, so we will go with it.
Your job is to search the code for the string "XXX work here" and make any necessary changes. You can choose whatever schema you want, but you should note that the entry_template makes some assumptions about the how the like value will be encoded and if you go with a different convention than it assumes, you will need to make some adjustments.
The validation script does not look at the database. It looks at the blog.
The validation script, final4-validate.js, will fetch your blog, go to the first post's permalink page and attempt to increment the vote count.
function PostsDAO(db) {
"use strict";
if (false === (this instanceof PostsDAO)) {
console.log('Warning: PostsDAO constructor called without "new" operator');
return new PostsDAO(db);
}
var posts = db.collection("posts");
this.insertEntry = function (title, body, tags, author, callback) {
"use strict";
console.log("inserting blog entry" + title + body);
var permalink = title.replace( /\s/g, '_' );
permalink = permalink.replace( /\W/g, '' );
var post = {"title": title,
"author": author,
"body": body,
"permalink":permalink,
"tags": tags,
"comments": [],
"date": new Date()}
posts.insert(post, function (err, result) {
"use strict";
if (err) return callback(err, null);
console.log("Inserted new post");
callback(err, permalink);
});
}
this.getPosts = function(num, callback) {
"use strict";
posts.find().sort('date', -1).limit(num).toArray(function(err, items) {
"use strict";
if (err) return callback(err, null);
console.log("Found " + items.length + " posts");
callback(err, items);
});
}
this.getPostsByTag = function(tag, num, callback) {
"use strict";
posts.find({ tags : tag }).sort('date', -1).limit(num).toArray(function(err, items) {
"use strict";
if (err) return callback(err, null);
console.log("Found " + items.length + " posts");
callback(err, items);
});
}
this.getPostByPermalink = function(permalink, callback) {
"use strict";
posts.findOne({'permalink': permalink}, function(err, post) {
"use strict";
if (err) return callback(err, null);
if (typeof post.comments === 'undefined') {
post.comments = [];
}
for (var i = 0; i < post.comments.length; i++) {
if (typeof post.comments[i].num_likes === 'undefined') {
post.comments[i].num_likes = 0;
}
post.comments[i].comment_ordinal = i;
}
callback(err, post);
});
}
this.addComment = function(permalink, name, email, body, callback) {
"use strict";
var comment = {'author': name, 'body': body}
if (email != "") {
comment['email'] = email
}
posts.update({'permalink': permalink}, {'$push': {'comments': comment}}, function(err, numModified) {
"use strict";
if (err) return callback(err, null);
callback(err, numModified);
});
}
this.incrementLikes = function(permalink, comment_ordinal, callback) {
"use strict";
var selector = {};
selector['comments.' + comment_ordinal + '.num_likes'] = 1;
posts.update(
{'permalink': permalink},
{ '$inc' : selector },
function(err, post) {
"use strict";
if (err) return callback(err, null);
console.dir(post);
callback(err, post);
});
}
}
module.exports.PostsDAO = PostsDAO;
Final: Question 5
Suppose your have a collection fubar with the following indexes created:
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"ns" : "test.fubar",
"name" : "_id_"
},
{
"v" : 1,
"key" : {
"a" : 1,
"b" : 1
},
"ns" : "test.fubar",
"name" : "a_1_b_1"
},
{
"v" : 1,
"key" : {
"a" : 1,
"c" : 1
},
"ns" : "test.fubar",
"name" : "a_1_c_1"
},
{
"v" : 1,
"key" : {
"c" : 1
},
"ns" : "test.fubar",
"name" : "c_1"
},
{
"v" : 1,
"key" : {
"a" : 1,
"b" : 1,
"c" : -1
},
"ns" : "test.fubar",
"name" : "a_1_b_1_c_-1"
}
]
Now suppose you want to run the following query against the collection.
db.fubar.find({'a':{'$lt':10000}, 'b':{'$gt': 5000}}, {'a':1, 'c':1}).sort({'c':-1})
Which of the following indexes could be used by MongoDB to assist in answering the query. Check all that apply.
a_1_b_1
a_1_c_1
c_1
a_1
_b_1_c_-1
Final: Question 7
You have been tasked to cleanup a photosharing database. The database consists of two collections, albums, and images. Every image is supposed to be in an album, but there are orphan images that appear in no album. Here are some example documents (not from the collections you will be downloading).
> db.albums.findOne()
{
"_id" : 67
"images" : [
4745,
7651,
15247,
17517,
17853,
20529,
22640,
27299,
27997,
32930,
35591,
48969,
52901,
57320,
96342,
99705
]
}
> db.images.findOne()
{ "_id" : 99705, "height" : 480, "width" : 640, "tags" : [ "dogs", "kittens", "work" ] }
From the above, you can conclude that the image with _id = 99705 is in album 67. It is not an orphan.
Your task is to write a program to remove every image from the images collection that appears in no album. Or put another way, if an image does not appear in at least one album, it's an orphan and should be removed from the images collection.
Download and unzip Final7.zip and use mongoimport to import the collections in albums.json and images.json.
When you are done removing the orphan images from the collection, there should be 89,737 documents in the images collection. To prove you did it correctly, what are the total number of images with the tag 'kittens" after the removal of orphans? As as a sanity check, there are 49,932 images that are tagged 'kittens' before you remove the images.
Hint: you might consider creating an index or two or your program will take a long time to run.
use photoshare
db.albums.ensureIndex({'images':1});
var cur = db.images.find();
var j = 0;
while(cur.hasNext()){
doc = cur.next();
image_id = doc._id
b = db.albums.find({images : image_id}).count()
if(b == 0){
db.images.remove({_id:image_id})
j++;
}
}
var client = require('mongodb').MongoClient;
client.connect('mongodb://localhost:27017/photos', function(err, db) {
if (err) throw err;
$unwind:'$$addToSet: '$$unwind:'$
var albums = db.collection('albums');
db.collection('images', function(err, images) {
if (err) throw err;
images.find({}, {'_id':true}, function(err, cursor) {
if (err) throw err;
var count = cursor.count(function(err, count) {
console.dir('num images: ' + count);
cursor.each(function(err, item) {
if (item !== null) {
albums.findOne({images:item._id}, function(err, doc) {
if (err) throw err;
if (doc == null) {
images.remove({'_id':item._id}, function(err, numRemoved) {
if (err) throw err;
count--;
console.dir('count: ' + count);
if (count == 0) db.close();
});
} else {
count--;
console.dir('count: ' + count);
if (count == 0) db.close();
}
});
}
});
});
})
});
});
db.images.find({tags:'kittens'}).count()