diff --git a/packages/spiderable/spiderable.js b/packages/spiderable/spiderable.js index 7d97505e62..30e81e28e6 100644 --- a/packages/spiderable/spiderable.js +++ b/packages/spiderable/spiderable.js @@ -2,22 +2,30 @@ var fs = __meteor_bootstrap__.require('fs'); var spawn = __meteor_bootstrap__.require('child_process').spawn; var querystring = __meteor_bootstrap__.require('querystring'); + var urlParser = __meteor_bootstrap__.require('url'); var app = __meteor_bootstrap__.app; + // list of bot user agents that we want to serve statically, but do + // not obey the _escaped_fragment_ protocol. The page is served + // statically to any client whos user agent matches any of these + // regexps. (possibly make this list configurable by user). + var AGENTS = [/^facebookexternalhit/]; + // how long to let phantomjs run before we kill it var REQUEST_TIMEOUT = 15*1000; app.use(function (req, res, next) { - if (/\?.*_escaped_fragment_=/.test(req.url)) { - // get escaped fragment out of the url. - var idx = req.url.indexOf('?'); - var preQuery = req.url.substr(0, idx); - var queryStr = req.url.substr(idx + 1); - var parsed = querystring.parse(queryStr); - delete parsed['_escaped_fragment_']; - var newQuery = querystring.stringify(parsed); - var newPath = preQuery + (newQuery ? "?" + newQuery : ""); - var url = "http://" + req.headers.host + newPath; + if (/\?.*_escaped_fragment_=/.test(req.url) || + _.any(AGENTS, function (re) { + return re.test(req.headers['user-agent']); })) { + + // reassembling url without escaped fragment if exists + var parsedUrl = urlParser.parse(req.url); + var parsedQuery = querystring.parse(parsedUrl.query); + delete parsedQuery['_escaped_fragment_']; + var newQuery = querystring.stringify(parsedQuery); + var newPath = parsedUrl.pathname + (newQuery ? ('?' + newQuery) : ''); + var url = "http://" + req.headers.host + newPath; // run phantomjs // @@ -38,7 +46,7 @@ }); cp.on('exit', function (code) { - if (0 === code && //i.test(data)) { + if (0 === code && /