Merge branch 'facebook-spiderable-411' into devel. Fixes #411

This commit is contained in:
Nick Martin
2012-10-29 19:14:52 -07:00

View File

@@ -2,22 +2,30 @@
var fs = __meteor_bootstrap__.require('fs');
var spawn = __meteor_bootstrap__.require('child_process').spawn;
var querystring = __meteor_bootstrap__.require('querystring');
var urlParser = __meteor_bootstrap__.require('url');
var app = __meteor_bootstrap__.app;
// list of bot user agents that we want to serve statically, but do
// not obey the _escaped_fragment_ protocol. The page is served
// statically to any client whos user agent matches any of these
// regexps. (possibly make this list configurable by user).
var AGENTS = [/^facebookexternalhit/];
// how long to let phantomjs run before we kill it
var REQUEST_TIMEOUT = 15*1000;
app.use(function (req, res, next) {
if (/\?.*_escaped_fragment_=/.test(req.url)) {
// get escaped fragment out of the url.
var idx = req.url.indexOf('?');
var preQuery = req.url.substr(0, idx);
var queryStr = req.url.substr(idx + 1);
var parsed = querystring.parse(queryStr);
delete parsed['_escaped_fragment_'];
var newQuery = querystring.stringify(parsed);
var newPath = preQuery + (newQuery ? "?" + newQuery : "");
var url = "http://" + req.headers.host + newPath;
if (/\?.*_escaped_fragment_=/.test(req.url) ||
_.any(AGENTS, function (re) {
return re.test(req.headers['user-agent']); })) {
// reassembling url without escaped fragment if exists
var parsedUrl = urlParser.parse(req.url);
var parsedQuery = querystring.parse(parsedUrl.query);
delete parsedQuery['_escaped_fragment_'];
var newQuery = querystring.stringify(parsedQuery);
var newPath = parsedUrl.pathname + (newQuery ? ('?' + newQuery) : '');
var url = "http://" + req.headers.host + newPath;
// run phantomjs
//
@@ -38,7 +46,7 @@
});
cp.on('exit', function (code) {
if (0 === code && /<html>/i.test(data)) {
if (0 === code && /<html/i.test(data)) {
res.writeHead(200, {'Content-Type': 'text/html; charset=UTF-8'});
res.end(data);
} else {