• Homework: Homework 4.1: queries that utilize index
  • Homework: Homework 4.2: explain index
  • Homework: Homework 4.3: add indexes to the blog
  • Homework: Homework 4.4: analyze a profile log

Homework: Homework 4.1

Suppose you have a collection with the following indexes:

> db.products.getIndexes()
[
    {
        "v" : 1,
        "key" : {
            "_id" : 1
        },
        "ns" : "store.products",
        "name" : "_id_"
    },
    {
        "v" : 1,
        "key" : {
            "sku" : 1
        },
                "unique" : true,
        "ns" : "store.products",
        "name" : "sku_1"
    },
    {
        "v" : 1,
        "key" : {
            "price" : -1
        },
        "ns" : "store.products",
        "name" : "price_-1"
    },
    {
        "v" : 1,
        "key" : {
            "description" : 1
        },
        "ns" : "store.products",
        "name" : "description_1"
    },
    {
        "v" : 1,
        "key" : {
            "category" : 1,
            "brand" : 1
        },
        "ns" : "store.products",
        "name" : "category_1_brand_1"
    },
    {
        "v" : 1,
        "key" : {
            "reviews.author" : 1
        },
        "ns" : "store.products",
        "name" : "reviews.author_1"
    }
  • List Item db.products.find({$and:[{price:{$gt:30}},{price:{$lt:50}}]}).sort({brand:1})
  • db.products.find({'brand':"GE"}).sort({price:1})

    Homework: Homework 4.2

    Suppose you have a collection called tweets whose documents contain information about the created_at time of the tweet and the user's followers_count at the time they issued the tweet. What can you infer from the following explain output?

db.tweets.find({"user.followers_count":{$gt:1000}}).sort({"created_at" : 1 }).limit(10).skip(5000).explain()
{
        "cursor" : "BtreeCursor created_at_-1 reverse",
        "isMultiKey" : false,
        "n" : 10,
        "nscannedObjects" : 46462,
        "nscanned" : 46462,
        "nscannedObjectsAllPlans" : 49763,
        "nscannedAllPlans" : 49763,
        "scanAndOrder" : false,
        "indexOnly" : false,
        "nYields" : 0,
        "nChunkSkips" : 0,
        "millis" : 205,
        "indexBounds" : {
                "created_at" : [
                        [
                                {
                                        "$minElement" : 1
                                },
                                {
                                        "$maxElement" : 1
                                }
                        ]
                ]
        },
        "server" : "localhost.localdomain:27017"
}
  • This query performs a collection scan.
  • The query uses an index to determine the order in which to return result documents.
  • The query visits 46462 documents.

    Homework: Homework 4.3

    Making the Blog fast

To get started, please download hw4.tar or hw4.zip and unpack file to your computer. Files for this homework should be included with your homework handout. This assignment requires Mongo 2.2 or above.

In this homework assignment you will be adding some indexes to the post collection to make the blog fast.

We have provided the full code for the blog application and you don't need to make any changes, or even run the blog. But you can, for fun.

We are also providing a patriotic (if you are an American) data set for the blog. There are 1000 entries with lots of comments and tags. You must load this dataset to complete the problem.

The blog has been enhanced so that it can also display the top 10 most recent posts by tag. There are hyperlinks from the post tags to the page that displays the 10 most recent blog entries for that tag. (run the blog and it will be obvious)

Your assignment is to make the following blog pages fast:

The blog home page
The page that displays blog posts by tag (http://localhost:3000/tag/whatever)
The page that displays a blog entry by permalink (http://localhost:3000/post/permalink)

By fast, we mean that indexes should be in place to satisfy these queries such that we only need to scan the number of documents we are going to return.

To figure out what queries you need to optimize, you can read the code in posts.js and see what queries it is doing to return the data needed for the relevant pages. Isolate those queries and use explain to explore.

Once you have added the indexes to make those pages fast, run the following commands to validate your project just like in previous homeworks. posts.js (indexes added)

/* The PostsDAO must be constructed with a connected database object */
function PostsDAO(db) {
    "use strict";
 
    /* If this constructor is called without the "new" operator, "this" points
     * to the global object. Log a warning and call it correctly. */
    if (false === (this instanceof PostsDAO)) {
        console.log('Warning: PostsDAO constructor called without "new" operator');
        return new PostsDAO(db);
    }
 
    var posts = db.collection("posts");
 
    this.insertEntry = function (title, body, tags, author, callback) {
        "use strict";
        console.log("inserting blog entry" + title + body);
 
        // fix up the permalink to not include whitespace
        var permalink = title.replace( /\s/g, '_' );
        permalink = permalink.replace( /\W/g, '' );
 
        // Build a new post
        var post = {"title": title,
                "author": author,
                "body": body,
                "permalink":permalink,
                "tags": tags,
                "comments": [],
                "date": new Date()}
 
        // now insert the post
        // hw3.2 TODO
        posts.insert(post, function (err, result) {
            "use strict";
 
            if (err) return callback(err, null);
 
            console.log("Inserted new post");
            callback(err, permalink);
        });
    }
 
    this.getPosts = function(num, callback) {
        "use strict";
 
        posts.find().sort('date', -1).limit(num).toArray(function(err, items) {
            "use strict";
 
            if (err) return callback(err, null);
 
            console.log("Found " + items.length + " posts");
 
            callback(err, items);
        });
    }
 
    this.getPostsByTag = function(tag, num, callback) {
        "use strict";
 
        posts.find({ tags : tag }).sort('date', -1).limit(num).toArray(function(err, items) {
            "use strict";
 
            if (err) return callback(err, null);
 
            console.log("Found " + items.length + " posts");
 
            callback(err, items);
        });
    }
 
    this.getPostByPermalink = function(permalink, callback) {
        "use strict";
        posts.findOne({'permalink': permalink}, function(err, post) {
            "use strict";
 
            if (err) return callback(err, null);
 
            callback(err, post);
        });
    }
 
    this.addComment = function(permalink, name, email, body, callback) {
        "use strict";
 
        var comment = {'author': name, 'body': body}
 
        if (email != "") {
            comment['email'] = email
        }
 
        // hw3.3 TODO
        posts.update({'permalink': permalink}, {'$push': {'comments': comment}}, function(err, numModified) {
            "use strict";
 
            if (err) return callback(err, null);
 
            callback(err, numModified);
        });
    }
}
 
module.exports.PostsDAO = PostsDAO;

#### Homework: Homework 4.4 #### In this problem you will analyze a profile log taken from a mongoDB instance. You should have included with your homework files a 'sysprofile.json' file. Import this file with the following command: Now query the profile data, looking for all queries to the students collection in the database school2, sorted in order of decreasing latency.

What is the latency of the longest running operation to the collection, in milliseconds?

db.profile.find({ns:/school2.students/}).sort({millis : -1}).limit(1).pretty()

Leave a Comment

Fields with * are required.

Please enter the letters as they are shown in the image above.
Letters are not case-sensitive.