Coding from Question 1 - 10 covering the whole course.

Final: Question 1

Please download the Enron email dataset enron.zip, unzip it and then restore it using mongorestore. It should restore to a collection called "messages" in a database called "enron". Note that this is an abbreviated version of the full corpus. There should be 120,477 documents after restore.

Inspect a few of the documents to get a basic understanding of the structure. Enron was an American corporation that engaged in a widespread accounting fraud and subsequently failed.

In this dataset, each document is an email message. Like all Email messages, there is one sender but there can be multiple recipients.

Construct a query to calculate the number of messages sent by Andrew Fastow, CFO, to Jeff Skilling, the president. Andrew Fastow's email addess was andrew.fastow@enron.com. Jeff Skilling's email was jeff.skilling@enron.com.

For reference, the number of email messages from Andrew Fastow to John Lavorato (john.lavorato@enron.com) was 1.

db.messages.aggregate({$project:{'headers.From':1, 'headers.To':1}},{$match:{'headers.From':'andrew.fastow@enron.com'}},{$unwind:'$headers.To'},{$match:{'headers.To':'jeff.skilling@enron.com'}},{$group:{_id:{from:'$headers.From', to:'$headers.To'},count:{$sum:1}}})

Please use the Enron dataset you imported for the previous problem. For this question you will use the aggregation framework to figure out pairs of people that tend to communicate a lot. To do this, you will need to unwind the To list for each message.

This problem is a little tricky because a recipient may appear more than once in the To list for a message. You will need to fix that in a stage of the aggregation before doing your grouping and counting of (sender, recipient) pairs.

Which pair of people have the greatest number of messages in the dataset?

var client = require('mongodb').MongoClient;
 
var pairs = [
    {from:'susan.mara@enron.com', to:'jeff.dasovich@enron.com'},
    {from:'susan.mara@enron.com', to:'richard.shapiro@enron.com'},
    {from:'soblander@carrfut.com', to:'soblander@carrfut.com'},
    {from:'susan.mara@enron.com', to:'james.steffes@enron.com'},
    {from:'evelyn.metoyer@enron.com', to:'kate.symes@enron.com'},
    {from:'susan.mara@enron.com', to:'alan.comnes@enron.com'}
];
 
client.connect('mongodb://localhost:27017/enron', function(err, db) {
    if (err) throw err;
 
    var count = pairs.length;   // # of pairs to run aggregate query over
 
    pairs.forEach(function(pair, index, array) {
        var pipeline = [
            {$project:{'headers.From':1, 'headers.To':1}},
            {$match:{'headers.From': pair.from}},
            {$unwind:'$headers.To'},
            {$match:{'headers.To': pair.to}},
            {$group:{
                //_id:{id: '$_id', from:'$headers.From'}, 
                _id:{id: '$_id', from:'$headers.From'}, 
                to:{$addToSet: '$headers.To'}
            }},
            {$unwind:'$to'},
            {$group:{
                _id:{from:'$_id.from', to:'$to'},
                count: {$sum: 1}
            }},
        ];
 
        db.collection('messages', function(err, collection) {
            if (err) throw err;
 
            collection.aggregate(pipeline, function(err, result) {
                console.dir(result);
 
                // close DB after results from all queries are complete
                count--;
                if (count == 0) db.close()
            });
        });
    });
});

Final: Question 3 In this problem you will update a document in the Enron dataset to illustrate your mastery of updating documents from the shell.

Please add the email address "mrpotatohead@mongodb.com" to the list of addresses in the "headers.To" array for the document with "headers.Message-ID" of "<8147308.1075851042335.JavaMail.evans@thyme>"

After you have completed that task, please download Final3.zip and run final3-validate.js to get the validation code and put it in the box below without any extra spaces. The validation script assumes that it is connecting to a simple mongo instance on the standard port on localhost.

var MongoClient = require('mongodb').MongoClient;
 
MongoClient.connect('mongodb://localhost:27017/enron', function(err, db) {
    if(err) throw err;
 
    db.collection('messages', function(err, collection) {
        if(err) throw err;
 
        collection.update(
            {'headers.Message-ID':'<8147308.1075851042335.JavaMail.evans@thyme>'},
            { $push : {'headers.To': 'mrpotatohead@mongodb.com'}},
        function(err, result){
            if(err) throw err;
            console.log(result);
            return db.close();
        });
 
    });
});

Final: Question 4

Enhancing the Blog to support viewers liking certain comments In this problem, you will be enhancing the blog project to support users liking certain comments and the like counts showing up the in the permalink page.

Start by downloading the code in Final4.zip and loading up the blog dataset posts.json. The user interface has already been implemented for you. It's not fancy. The /post URL shows the like counts next to each comment and displays a Like button that you can click on. That Like button POSTS to the /like URL on the blog, makes the necessary changes to the database state (you are implementing this), and then redirects the browser back to the permalink page.

This full round trip and redisplay of the entire web page is not how you would implement liking in a modern web app, but it makes it easier for us to reason about, so we will go with it.

Your job is to search the code for the string "XXX work here" and make any necessary changes. You can choose whatever schema you want, but you should note that the entry_template makes some assumptions about the how the like value will be encoded and if you go with a different convention than it assumes, you will need to make some adjustments.

The validation script does not look at the database. It looks at the blog.

The validation script, final4-validate.js, will fetch your blog, go to the first post's permalink page and attempt to increment the vote count.

/* The PostsDAO must be constructed with a connected database object */
function PostsDAO(db) {
    "use strict";
 
    /* If this constructor is called without the "new" operator, "this" points
* to the global object. Log a warning and call it correctly. */
    if (false === (this instanceof PostsDAO)) {
        console.log('Warning: PostsDAO constructor called without "new" operator');
        return new PostsDAO(db);
    }
 
    var posts = db.collection("posts");
 
    this.insertEntry = function (title, body, tags, author, callback) {
        "use strict";
        console.log("inserting blog entry" + title + body);
 
        // fix up the permalink to not include whitespace
        var permalink = title.replace( /\s/g, '_' );
        permalink = permalink.replace( /\W/g, '' );
 
        // Build a new post
        var post = {"title": title,
                "author": author,
                "body": body,
                "permalink":permalink,
                "tags": tags,
                "comments": [],
                "date": new Date()}
 
        // now insert the post
        posts.insert(post, function (err, result) {
            "use strict";
 
            if (err) return callback(err, null);
 
            console.log("Inserted new post");
            callback(err, permalink);
        });
    }
 
    this.getPosts = function(num, callback) {
        "use strict";
 
        posts.find().sort('date', -1).limit(num).toArray(function(err, items) {
            "use strict";
 
            if (err) return callback(err, null);
 
            console.log("Found " + items.length + " posts");
 
            callback(err, items);
        });
    }
 
    this.getPostsByTag = function(tag, num, callback) {
        "use strict";
 
        posts.find({ tags : tag }).sort('date', -1).limit(num).toArray(function(err, items) {
            "use strict";
 
            if (err) return callback(err, null);
 
            console.log("Found " + items.length + " posts");
 
            callback(err, items);
        });
    }
 
    this.getPostByPermalink = function(permalink, callback) {
        "use strict";
        posts.findOne({'permalink': permalink}, function(err, post) {
            "use strict";
 
            if (err) return callback(err, null);
 
            // XXX: Look here for final exam to see where we store "num_likes"
 
            // fix up likes values. set to zero if data is not present
            if (typeof post.comments === 'undefined') {
                post.comments = [];
            }
 
            // Each comment document in a post should have a "num_likes" entry, so we have to
            // iterate all the comments in the post to make sure that is the case
            for (var i = 0; i < post.comments.length; i++) {
                if (typeof post.comments[i].num_likes === 'undefined') {
                    post.comments[i].num_likes = 0;
                }
                post.comments[i].comment_ordinal = i;
            }
            callback(err, post);
        });
    }
 
    this.addComment = function(permalink, name, email, body, callback) {
        "use strict";
 
        var comment = {'author': name, 'body': body}
 
        if (email != "") {
            comment['email'] = email
        }
 
        posts.update({'permalink': permalink}, {'$push': {'comments': comment}}, function(err, numModified) {
            "use strict";
 
            if (err) return callback(err, null);
 
            callback(err, numModified);
        });
    }
 
    this.incrementLikes = function(permalink, comment_ordinal, callback) {
        "use strict";
 
        // The "comment_ordinal" argument specifies which comment in the post we are looking at
        // Here is an example of how to build a selector with the 'comment_ordinal' variable
        // We have to do it this way because a literal object with variables in field names such as:
        // { 'comments.' + comment_ordinal + '.author' : 'Frank' } is illegal Javascript.
        // var selector_example = {};
        // var comment_ordinal_example = 0;
        // selector_example['comments.' + comment_ordinal_example + '.author'] = 'Frank';
        // Now selector_example = { 'comments.0.author' : 'Frank' } which is a selector for the
        // string 'Frank' in the 'author' field of the first element of the 'comments' array (which
        // is zero indexed).
 
        // TODO (OLD): Final exam question - Increment the number of likes
        // callback(Error("incrementLikes NYI"), null);
 
        // ANSWER
        var selector = {};
        selector['comments.' + comment_ordinal + '.num_likes'] = 1;
        posts.update(
            {'permalink': permalink},
            { '$inc' : selector },
            function(err, post) {
                "use strict";
 
                if (err) return callback(err, null);
                console.dir(post);
                callback(err, post);
        });
    }
}
 
module.exports.PostsDAO = PostsDAO;

Final: Question 5

Suppose your have a collection fubar with the following indexes created:

[
    {
        "v" : 1,
        "key" : {
            "_id" : 1
        },
        "ns" : "test.fubar",
        "name" : "_id_"
    },
    {
        "v" : 1,
        "key" : {
            "a" : 1,
            "b" : 1
        },
        "ns" : "test.fubar",
        "name" : "a_1_b_1"
    },
    {
        "v" : 1,
        "key" : {
            "a" : 1,
            "c" : 1
        },
        "ns" : "test.fubar",
        "name" : "a_1_c_1"
    },
    {
        "v" : 1,
        "key" : {
            "c" : 1
        },
        "ns" : "test.fubar",
        "name" : "c_1"
    },
    {
        "v" : 1,
        "key" : {
            "a" : 1,
            "b" : 1,
            "c" : -1
        },
        "ns" : "test.fubar",
        "name" : "a_1_b_1_c_-1"
    }
]
 
Now suppose you want to run the following query against the collection.
 
db.fubar.find({'a':{'$lt':10000}, 'b':{'$gt': 5000}}, {'a':1, 'c':1}).sort({'c':-1})
 
Which of the following indexes could be used by MongoDB to assist in answering the query. Check all that apply. 
 
a_1_b_1
a_1_c_1
c_1
a_1
_b_1_c_-1

Final: Question 7

You have been tasked to cleanup a photosharing database. The database consists of two collections, albums, and images. Every image is supposed to be in an album, but there are orphan images that appear in no album. Here are some example documents (not from the collections you will be downloading).

> db.albums.findOne()
{
    "_id" : 67
    "images" : [
        4745,
        7651,
        15247,
        17517,
        17853,
        20529,
        22640,
        27299,
        27997,
        32930,
        35591,
        48969,
        52901,
        57320,
        96342,
        99705
    ]
}
 
> db.images.findOne()
{ "_id" : 99705, "height" : 480, "width" : 640, "tags" : [ "dogs", "kittens", "work" ] }

From the above, you can conclude that the image with _id = 99705 is in album 67. It is not an orphan.

Your task is to write a program to remove every image from the images collection that appears in no album. Or put another way, if an image does not appear in at least one album, it's an orphan and should be removed from the images collection.

Download and unzip Final7.zip and use mongoimport to import the collections in albums.json and images.json.

When you are done removing the orphan images from the collection, there should be 89,737 documents in the images collection. To prove you did it correctly, what are the total number of images with the tag 'kittens" after the removal of orphans? As as a sanity check, there are 49,932 images that are tagged 'kittens' before you remove the images. Hint: you might consider creating an index or two or your program will take a long time to run.

//# 1 code
use photoshare
 
db.albums.ensureIndex({'images':1});
var cur = db.images.find();
 
var j = 0;
while(cur.hasNext()){
doc = cur.next();
image_id = doc._id
 
b = db.albums.find({images : image_id}).count()
if(b == 0){
db.images.remove({_id:image_id})
j++;
}
}
 
//#2 code
var client = require('mongodb').MongoClient;
 
client.connect('mongodb://localhost:27017/photos', function(err, db) {
    if (err) throw err;
 
    /*
    var pipeline = [
        {$project:{'headers.From':1, 'headers.To':1}},
        {$match:{'headers.From': pair.from}},
        {$unwind:'$headers.To'},
        {$match:{'headers.To': pair.to}},
        {$group:{
            //_id:{id: '$_id', from:'$headers.From'}, 
            _id:{id: '$_id', from:'$headers.From'}, 
            to:{$addToSet: '$headers.To'}
        }},
        {$unwind:'$to'},
        {$group:{
            _id:{from:'$_id.from', to:'$to'},
            count: {$sum: 1}
        }},
    ];
    */
 
    /*
    db.collection('messages', function(err, collection) {
        if (err) throw err;
 
        collection.aggregate(pipeline, function(err, result) {
            console.dir(result);
 
            // close DB after results from all queries are complete
            count--;
            if (count == 0) db.close()
        });
    });
    */
 
    var albums = db.collection('albums');
 
    db.collection('images', function(err, images) {
        if (err) throw err;
 
        images.find({}, {'_id':true}, function(err, cursor) {
            if (err) throw err;
 
            var count = cursor.count(function(err, count) {
                console.dir('num images: ' + count);
 
                // iterate over each image
                cursor.each(function(err, item) {
                    if (item !== null) {
                        // attempt to find an album containing the photo, if not prune
                        albums.findOne({images:item._id}, function(err, doc) {
                            if (err) throw err;
 
                            if (doc == null) {
                                images.remove({'_id':item._id}, function(err, numRemoved) {
                                    if (err) throw err;
 
                                    count--;
                                    console.dir('count: ' + count);
                                    if (count == 0) db.close();
                                });
                            } else {
                                count--;
                                console.dir('count: ' + count);
                                if (count == 0) db.close();
                            }
                        });
                    }   
                });
            });
        })
    });
 
});
 
//kittens query
db.images.find({tags:'kittens'}).count()

Leave a Comment

Fields with * are required.

Please enter the letters as they are shown in the image above.
Letters are not case-sensitive.