Add GeoIP service.

This commit is contained in:
Brian Simpson
2014-02-18 08:13:06 -05:00
committed by Neil Williams
parent ca0001aaac
commit 20f57a17eb
5 changed files with 250 additions and 0 deletions

View File

@@ -139,6 +139,11 @@ python-kazoo
python-stripe
python-tinycss2
python-flask
geoip-bin
geoip-database
python-geoip
nodejs
node-less
gettext
@@ -547,6 +552,28 @@ fi
start sutro
###############################################################################
# geoip service
###############################################################################
if [ ! -f /etc/gunicorn.d/geoip.conf ]; then
cat > /etc/gunicorn.d/geoip.conf <<GEOIP
CONFIG = {
"mode": "wsgi",
"working_dir": "$REDDIT_HOME/reddit/scripts",
"user": "$REDDIT_USER",
"group": "$REDDIT_USER",
"args": (
"--bind=127.0.0.1:5000",
"--workers=1",
"--limit-request-line=8190",
"geoip_service:application",
),
}
GEOIP
fi
service gunicorn start
###############################################################################
# Job Environment
###############################################################################

View File

@@ -375,6 +375,8 @@ words_file = /usr/dict/words
case_sensitive_domains = i.imgur.com, youtube.com
# whether to load reddit private code (a hack until we structure it better)
import_private = false
# location of geoip service
geoip_location = 127.0.0.1:5000
############################################ AUTHENTICATION

110
r2/r2/lib/geoip.py Normal file
View File

@@ -0,0 +1,110 @@
#!/usr/bin/python
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2014 reddit
# Inc. All Rights Reserved.
###############################################################################
import datetime
import httplib
import json
import os
import socket
import urllib2
from pylons import g
from r2.lib.cache import sgm
from r2.lib.utils import in_chunks, tup
# If the geoip service has nginx in front of it there is a default limit of 8kb:
# http://wiki.nginx.org/NginxHttpCoreModule#large_client_header_buffers
# >>> len('GET /geoip/' + '+'.join(['255.255.255.255'] * 500) + ' HTTP/1.1')
# 8019
MAX_IPS_PER_GROUP = 500
GEOIP_CACHE_TIME = datetime.timedelta(days=7).total_seconds()
def _location_by_ips(ips):
if not hasattr(g, 'geoip_location'):
g.log.warning("g.geoip_location not set. skipping GeoIP lookup.")
return {}
ret = {}
for batch in in_chunks(ips, MAX_IPS_PER_GROUP):
ip_string = '+'.join(batch)
url = os.path.join(g.geoip_location, 'geoip', ip_string)
try:
response = urllib2.urlopen(url=url, timeout=3)
json_data = response.read()
except (urllib2.URLError, httplib.HTTPException, socket.error) as e:
g.log.warning("Failed to fetch GeoIP information: %r" % e)
continue
try:
ret.update(json.loads(json_data))
except ValueError, e:
g.log.warning("Invalid JSON response for GeoIP lookup: %r" % e)
continue
return ret
def _organization_by_ips(ips):
if not hasattr(g, 'geoip_location'):
g.log.warning("g.geoip_location not set. skipping GeoIP lookup.")
return {}
ip_string = '+'.join(set(ips))
url = os.path.join(g.geoip_location, 'org', ip_string)
try:
response = urllib2.urlopen(url=url, timeout=3)
json_data = response.read()
except urllib2.URLError, e:
g.log.warning("Failed to fetch GeoIP information: %r" % e)
return {}
try:
return json.loads(json_data)
except ValueError, e:
g.log.warning("Invalid JSON response for GeoIP lookup: %r" % e)
return {}
def location_by_ips(ips):
ips, is_single = tup(ips, ret_is_single=True)
location_by_ip = sgm(g.cache, ips, miss_fn=_location_by_ips,
prefix='location_by_ip',
time=GEOIP_CACHE_TIME)
if is_single and location_by_ip:
return location_by_ip[ips[0]]
else:
return location_by_ip
def organization_by_ips(ips):
ips, is_single = tup(ips, ret_is_single=True)
organization_by_ip = sgm(g.cache, ips, miss_fn=_organization_by_ips,
prefix='organization_by_ip',
time=GEOIP_CACHE_TIME)
if is_single and organization_by_ip:
return organization_by_ip[ips[0]]
else:
return organization_by_ip

98
scripts/geoip_service.py Normal file
View File

@@ -0,0 +1,98 @@
#!/usr/bin/python
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2014 reddit
# Inc. All Rights Reserved.
###############################################################################
"""
This is a tiny Flask app used for geoip lookups against a maxmind database.
If you are using this service be sure to set `geoip_location` in your ini file.
"""
import json
import GeoIP
from flask import Flask, make_response
application = Flask(__name__)
# SET THESE PATHS TO YOUR MAXMIND GEOIP LEGACY DATABASES
# http://dev.maxmind.com/geoip/legacy/geolite/
COUNTRY_DB_PATH = '/usr/share/GeoIP/GeoIP.dat'
CITY_DB_PATH = '/usr/share/GeoIP/GeoIPCity.dat'
ORG_DB_PATH = '/usr/share/GeoIP/GeoIPOrg.dat'
try:
gc = GeoIP.open(COUNTRY_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
except:
gc = None
try:
gi = GeoIP.open(CITY_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
except:
gi = None
try:
go = GeoIP.open(ORG_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
except:
go = None
def json_response(result):
json_output = json.dumps(result, ensure_ascii=False, encoding='iso-8859-1')
response = make_response(json_output.encode('utf-8'), 200)
response.headers['Content-Type'] = 'application/json; charset=utf-8'
return response
@application.route('/geoip/<ips>')
def get_record(ips):
result = {}
ips = ips.split('+')
if gi:
for ip in ips:
result[ip] = gi.record_by_addr(ip)
elif gc:
for ip in ips:
result[ip] = {
'country_code': gc.country_code_by_addr(ip),
'country_name': gc.country_name_by_addr(ip),
}
return json_response(result)
@application.route('/org/<ips>')
def get_organizations(ips):
result = {}
ips = ips.split('+')
if go:
for ip in ips:
result[ip] = go.org_by_addr(ip)
return json_response(result)
if __name__ == "__main__":
application.run()

View File

@@ -0,0 +1,13 @@
description "refresh the geoip databases"
manual
task
stop on reddit-stop or runlevel [016]
nice 10
script
. /etc/default/reddit
geoipupdate
service gunicorn reload geoip.conf
end script