mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-10 07:28:03 -05:00
Add GeoIP service.
This commit is contained in:
committed by
Neil Williams
parent
ca0001aaac
commit
20f57a17eb
@@ -139,6 +139,11 @@ python-kazoo
|
||||
python-stripe
|
||||
python-tinycss2
|
||||
|
||||
python-flask
|
||||
geoip-bin
|
||||
geoip-database
|
||||
python-geoip
|
||||
|
||||
nodejs
|
||||
node-less
|
||||
gettext
|
||||
@@ -547,6 +552,28 @@ fi
|
||||
|
||||
start sutro
|
||||
|
||||
###############################################################################
|
||||
# geoip service
|
||||
###############################################################################
|
||||
if [ ! -f /etc/gunicorn.d/geoip.conf ]; then
|
||||
cat > /etc/gunicorn.d/geoip.conf <<GEOIP
|
||||
CONFIG = {
|
||||
"mode": "wsgi",
|
||||
"working_dir": "$REDDIT_HOME/reddit/scripts",
|
||||
"user": "$REDDIT_USER",
|
||||
"group": "$REDDIT_USER",
|
||||
"args": (
|
||||
"--bind=127.0.0.1:5000",
|
||||
"--workers=1",
|
||||
"--limit-request-line=8190",
|
||||
"geoip_service:application",
|
||||
),
|
||||
}
|
||||
GEOIP
|
||||
fi
|
||||
|
||||
service gunicorn start
|
||||
|
||||
###############################################################################
|
||||
# Job Environment
|
||||
###############################################################################
|
||||
|
||||
@@ -375,6 +375,8 @@ words_file = /usr/dict/words
|
||||
case_sensitive_domains = i.imgur.com, youtube.com
|
||||
# whether to load reddit private code (a hack until we structure it better)
|
||||
import_private = false
|
||||
# location of geoip service
|
||||
geoip_location = 127.0.0.1:5000
|
||||
|
||||
|
||||
############################################ AUTHENTICATION
|
||||
|
||||
110
r2/r2/lib/geoip.py
Normal file
110
r2/r2/lib/geoip.py
Normal file
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/python
|
||||
# The contents of this file are subject to the Common Public Attribution
|
||||
# License Version 1.0. (the "License"); you may not use this file except in
|
||||
# compliance with the License. You may obtain a copy of the License at
|
||||
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
|
||||
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
|
||||
# software over a computer network and provide for limited attribution for the
|
||||
# Original Developer. In addition, Exhibit A has been modified to be consistent
|
||||
# with Exhibit B.
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
|
||||
# the specific language governing rights and limitations under the License.
|
||||
#
|
||||
# The Original Code is reddit.
|
||||
#
|
||||
# The Original Developer is the Initial Developer. The Initial Developer of
|
||||
# the Original Code is reddit Inc.
|
||||
#
|
||||
# All portions of the code written by reddit are Copyright (c) 2006-2014 reddit
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
|
||||
import datetime
|
||||
import httplib
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import urllib2
|
||||
|
||||
from pylons import g
|
||||
|
||||
from r2.lib.cache import sgm
|
||||
from r2.lib.utils import in_chunks, tup
|
||||
|
||||
# If the geoip service has nginx in front of it there is a default limit of 8kb:
|
||||
# http://wiki.nginx.org/NginxHttpCoreModule#large_client_header_buffers
|
||||
# >>> len('GET /geoip/' + '+'.join(['255.255.255.255'] * 500) + ' HTTP/1.1')
|
||||
# 8019
|
||||
MAX_IPS_PER_GROUP = 500
|
||||
|
||||
GEOIP_CACHE_TIME = datetime.timedelta(days=7).total_seconds()
|
||||
|
||||
def _location_by_ips(ips):
|
||||
if not hasattr(g, 'geoip_location'):
|
||||
g.log.warning("g.geoip_location not set. skipping GeoIP lookup.")
|
||||
return {}
|
||||
|
||||
ret = {}
|
||||
for batch in in_chunks(ips, MAX_IPS_PER_GROUP):
|
||||
ip_string = '+'.join(batch)
|
||||
url = os.path.join(g.geoip_location, 'geoip', ip_string)
|
||||
|
||||
try:
|
||||
response = urllib2.urlopen(url=url, timeout=3)
|
||||
json_data = response.read()
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error) as e:
|
||||
g.log.warning("Failed to fetch GeoIP information: %r" % e)
|
||||
continue
|
||||
|
||||
try:
|
||||
ret.update(json.loads(json_data))
|
||||
except ValueError, e:
|
||||
g.log.warning("Invalid JSON response for GeoIP lookup: %r" % e)
|
||||
continue
|
||||
return ret
|
||||
|
||||
|
||||
def _organization_by_ips(ips):
|
||||
if not hasattr(g, 'geoip_location'):
|
||||
g.log.warning("g.geoip_location not set. skipping GeoIP lookup.")
|
||||
return {}
|
||||
|
||||
ip_string = '+'.join(set(ips))
|
||||
url = os.path.join(g.geoip_location, 'org', ip_string)
|
||||
|
||||
try:
|
||||
response = urllib2.urlopen(url=url, timeout=3)
|
||||
json_data = response.read()
|
||||
except urllib2.URLError, e:
|
||||
g.log.warning("Failed to fetch GeoIP information: %r" % e)
|
||||
return {}
|
||||
|
||||
try:
|
||||
return json.loads(json_data)
|
||||
except ValueError, e:
|
||||
g.log.warning("Invalid JSON response for GeoIP lookup: %r" % e)
|
||||
return {}
|
||||
|
||||
|
||||
def location_by_ips(ips):
|
||||
ips, is_single = tup(ips, ret_is_single=True)
|
||||
location_by_ip = sgm(g.cache, ips, miss_fn=_location_by_ips,
|
||||
prefix='location_by_ip',
|
||||
time=GEOIP_CACHE_TIME)
|
||||
if is_single and location_by_ip:
|
||||
return location_by_ip[ips[0]]
|
||||
else:
|
||||
return location_by_ip
|
||||
|
||||
|
||||
def organization_by_ips(ips):
|
||||
ips, is_single = tup(ips, ret_is_single=True)
|
||||
organization_by_ip = sgm(g.cache, ips, miss_fn=_organization_by_ips,
|
||||
prefix='organization_by_ip',
|
||||
time=GEOIP_CACHE_TIME)
|
||||
if is_single and organization_by_ip:
|
||||
return organization_by_ip[ips[0]]
|
||||
else:
|
||||
return organization_by_ip
|
||||
98
scripts/geoip_service.py
Normal file
98
scripts/geoip_service.py
Normal file
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/python
|
||||
# The contents of this file are subject to the Common Public Attribution
|
||||
# License Version 1.0. (the "License"); you may not use this file except in
|
||||
# compliance with the License. You may obtain a copy of the License at
|
||||
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
|
||||
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
|
||||
# software over a computer network and provide for limited attribution for the
|
||||
# Original Developer. In addition, Exhibit A has been modified to be consistent
|
||||
# with Exhibit B.
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
|
||||
# the specific language governing rights and limitations under the License.
|
||||
#
|
||||
# The Original Code is reddit.
|
||||
#
|
||||
# The Original Developer is the Initial Developer. The Initial Developer of
|
||||
# the Original Code is reddit Inc.
|
||||
#
|
||||
# All portions of the code written by reddit are Copyright (c) 2006-2014 reddit
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
"""
|
||||
This is a tiny Flask app used for geoip lookups against a maxmind database.
|
||||
|
||||
If you are using this service be sure to set `geoip_location` in your ini file.
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import GeoIP
|
||||
from flask import Flask, make_response
|
||||
|
||||
application = Flask(__name__)
|
||||
|
||||
# SET THESE PATHS TO YOUR MAXMIND GEOIP LEGACY DATABASES
|
||||
# http://dev.maxmind.com/geoip/legacy/geolite/
|
||||
COUNTRY_DB_PATH = '/usr/share/GeoIP/GeoIP.dat'
|
||||
CITY_DB_PATH = '/usr/share/GeoIP/GeoIPCity.dat'
|
||||
ORG_DB_PATH = '/usr/share/GeoIP/GeoIPOrg.dat'
|
||||
|
||||
|
||||
try:
|
||||
gc = GeoIP.open(COUNTRY_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
|
||||
except:
|
||||
gc = None
|
||||
|
||||
try:
|
||||
gi = GeoIP.open(CITY_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
|
||||
except:
|
||||
gi = None
|
||||
|
||||
try:
|
||||
go = GeoIP.open(ORG_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
|
||||
except:
|
||||
go = None
|
||||
|
||||
|
||||
def json_response(result):
|
||||
json_output = json.dumps(result, ensure_ascii=False, encoding='iso-8859-1')
|
||||
response = make_response(json_output.encode('utf-8'), 200)
|
||||
response.headers['Content-Type'] = 'application/json; charset=utf-8'
|
||||
return response
|
||||
|
||||
|
||||
@application.route('/geoip/<ips>')
|
||||
def get_record(ips):
|
||||
result = {}
|
||||
ips = ips.split('+')
|
||||
|
||||
if gi:
|
||||
for ip in ips:
|
||||
result[ip] = gi.record_by_addr(ip)
|
||||
elif gc:
|
||||
for ip in ips:
|
||||
result[ip] = {
|
||||
'country_code': gc.country_code_by_addr(ip),
|
||||
'country_name': gc.country_name_by_addr(ip),
|
||||
}
|
||||
|
||||
return json_response(result)
|
||||
|
||||
|
||||
@application.route('/org/<ips>')
|
||||
def get_organizations(ips):
|
||||
result = {}
|
||||
ips = ips.split('+')
|
||||
|
||||
if go:
|
||||
for ip in ips:
|
||||
result[ip] = go.org_by_addr(ip)
|
||||
|
||||
return json_response(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
application.run()
|
||||
13
upstart/reddit-job-update_geoip.conf
Normal file
13
upstart/reddit-job-update_geoip.conf
Normal file
@@ -0,0 +1,13 @@
|
||||
description "refresh the geoip databases"
|
||||
|
||||
manual
|
||||
task
|
||||
stop on reddit-stop or runlevel [016]
|
||||
|
||||
nice 10
|
||||
|
||||
script
|
||||
. /etc/default/reddit
|
||||
geoipupdate
|
||||
service gunicorn reload geoip.conf
|
||||
end script
|
||||
Reference in New Issue
Block a user