updates to service monitor to allow for each app's tracking of database load. Also 'redirecting...' message on most commonly used ajax'd forms when input is accepted.

This commit is contained in:
KeyserSosa
2009-02-02 14:43:27 -08:00
parent 5494254cac
commit f0583ecb17
5 changed files with 192 additions and 69 deletions

View File

@@ -96,7 +96,7 @@ class ApiController(RedditController):
@validatedForm()
def ajax_login_redirect(self, form, jquery, dest):
jquery.redirect("/login" + query_string(dict(dest=dest)))
form.redirect("/login" + query_string(dict(dest=dest)))
@validate(link = VUrl(['url']),
count = VLimit('limit'))
@@ -192,7 +192,7 @@ class ApiController(RedditController):
if form.has_errors("url", errors.NO_URL, errors.BAD_URL):
pass
elif form.has_errors("url", errors.ALREADY_SUB):
jquery.redirect(url[0].already_submitted_link)
form.redirect(url[0].already_submitted_link)
# check for title, otherwise look it up and return it
elif form.has_errors("title", errors.NO_TITLE):
# try to fetch the title
@@ -256,16 +256,16 @@ class ApiController(RedditController):
c.cname = False
path = l.make_permalink_slow()
c.cname = cname
jquery.redirect(path)
form.redirect(path)
def _login(self, jquery, user, dest='', rem = None):
def _login(self, form, user, dest='', rem = None):
"""
AJAX login handler, used by both login and register to set the
user cookie and send back a redirect.
"""
self.login(user, rem = rem)
dest = dest or request.referer or '/'
jquery.redirect(dest)
form.redirect(dest)
@validatedForm(user = VLogin(['user', 'passwd']),
@@ -276,7 +276,7 @@ class ApiController(RedditController):
if reason and reason[0] == 'redirect':
dest = reason[1]
if not form.has_errors("passwd", errors.WRONG_PASSWORD):
self._login(jquery, user, dest, rem)
self._login(form, user, dest, rem)
@validatedForm(VCaptcha(),
@@ -321,7 +321,7 @@ class ApiController(RedditController):
for sr, sub in reason[1].iteritems():
self._subscribe(sr, sub)
self._login(jquery, user, dest, rem)
self._login(form, user, dest, rem)
@noresponse(VUser(),
@@ -482,7 +482,7 @@ class ApiController(RedditController):
"""
if areyousure1 == areyousure2 == areyousure3 == 'yes':
c.user.delete()
jquery.redirect('/?deleted=true')
form.redirect('/?deleted=true')
else:
form.set_html('.status', _("see? you don't really want to leave"))
@@ -981,7 +981,7 @@ class ApiController(RedditController):
form.parent().set_html('.status', _("saved"))
if redir:
jquery.redirect(redir)
form.redirect(redir)
@noresponse(VModhash(),
VSrCanBan('id'),
@@ -1141,7 +1141,7 @@ class ApiController(RedditController):
password = VPassword(['passwd', 'passwd2']))
def POST_resetpassword(self, form, jquery, user, password):
if errors.BAD_USERNAME in c.errors:
return jquery.redirect('/password')
return form.redirect('/password')
elif (not form.has_errors('passwd', errors.BAD_PASSWORD) and
not form.has_errors('passwd2', errors.BAD_PASSWORD_MATCH) and
user):
@@ -1296,7 +1296,7 @@ class ApiController(RedditController):
l._commit()
l.update_url_cache(old_url)
jquery.redirect('/promote/edit_promo/%s' % to36(l._id))
form.redirect('/promote/edit_promo/%s' % to36(l._id))
else:
l = Link._submit(title, url, c.user, sr, ip, False)
@@ -1312,7 +1312,7 @@ class ApiController(RedditController):
promote_until = promote_until,
disable_comments = disable_comments)
jquery.redirect('/promote/edit_promo/%s' % to36(l._id))
form.redirect('/promote/edit_promo/%s' % to36(l._id))
def GET_link_thumb(self, *a, **kw):
"""

View File

@@ -985,8 +985,16 @@ textarea.gray { color: gray; }
padding-right: 5px; }
.wired img {vertical-align: middle;}
.server-status { width: 300px; }
.server-status table { font-size: xx-small; margin-left: 5px; }
.server-status { width: 300px; }
.server-status table {
font-size: xx-small;
margin-left: 5px;
border-top: #BCBCBC solid 1px;
border-left: #BCBCBC solid 1px;
border-bottom: #E0E0E0 solid 1px;
border-right: #E0E0E0 solid 1px;
margin-bottom: 5px;
}
.server-status td { padding-right: 2px; padding-left: 2px; }
.server-status .bar { height: 5px; background-color: blue; }
.server-status .load0 { background-color: #FFFFFF; }
@@ -995,12 +1003,21 @@ textarea.gray { color: gray; }
.server-status .load3 { background-color: #FFEA71; }
.server-status .load4 { background-color: #FF9191; }
.server-status .load5 { background-color: #FF0000; color: #FFFFFF }
.server-status tr.down > * {
background-color: #C0C0C0;
text-decoration: line-through;
}
.server-status th { font-weight: bold; padding-right: 2px; }
.server-status tr.title-region { cursor: pointer; }
.server-status tr.title-region:hover > td,
.server-status tr.title-region:hover > th { text-decoration: underline; }
.server-status tr.title-region.empty { cursor: default; opacity: 0.7; }
.server-status tr.title-region.empty:hover > td,
.server-status tr.title-region.empty:hover > th { text-decoration: none; }
.server-status .pegged {
background-color: red;
font-weight: bold;

View File

@@ -17,11 +17,23 @@ $.log = function(message) {
};
$.debug = $.log;
$.fn.debug = function() {
$.debug($(this));
return $(this);
}
$.redirect = function(dest) {
window.location = dest;
};
$.fn.redirect = function(dest) {
/* for forms which are "posting" by ajax leading to a redirect */
$(this).filter("form").find(".status").show().html("redirecting...");
$.redirect(dest);
/* this should never happen, but for the sake of internal consistency */
return $(this)
}
$.refresh = function() {
window.location.reload(true);
};

View File

@@ -61,6 +61,24 @@
<span style="color:red">${host.database.connections(300)}</span>
</td>
</tr>
<%
qcount = host.database.query_count \
if hasattr(host.database, "query_count") else None
%>
%if qcount:
<tr class="load${load_level} title-region" id="${host_id}">
<th>
</th>
<th>
query count:
</th>
<td>
<span style="color:green">${qcount()}</span>
&nbsp;/&nbsp;
<span style="color:red">${qcount(300)}</span>
</td>
</tr>
%endif
<tr ${hide_data} class="data load${load_level} machine-${host_id}">
<th>by ip:</th>
<td></td><td></td>
@@ -115,11 +133,11 @@
%for host in thing.hostlogs:
<%
host_id = host.host.replace('.', '-')
s = host.services
load = host.load()
load_level = min(max(int(load+0.5), 0),5)
empty_cls = '' if len(host.services) else 'empty'
%>
<tr class="load${load_level} title-region" id="${host_id}">
<tr class="load${load_level} title-region ${empty_cls}" id="${host_id}">
<th>
${host.host} load: ${load}
</th>
@@ -137,9 +155,11 @@
mem_wid = int(mem_col/25*min(25, int(service.mem())))
cpu_60_wid = int(cpu_col/100*min(100,int(service.cpu(60))))
cpu_300_wid = int(cpu_col/100*min(100,int(service.cpu(300))))
is_down = 'down' if service.pid < 0 else ''
%>
<tr ${hide_data} class="data load${load_level} ${pegged} machine-${host_id}">
<tr ${hide_data}
class="data load${load_level} ${pegged} ${is_down} machine-${host_id}">
<td>
%if g.reddit_host == host.host and g.reddit_pid == service.pid:
<b style="color:orangered; font-size:larger">&raquo;</b>
@@ -165,7 +185,7 @@
else:
age = "%d min" % age
%>
${age}
${age if service.pid > 0 else 'down'}
</td>
</tr>
%endfor

View File

@@ -20,28 +20,87 @@
# CondeNet, Inc. All Rights Reserved.
################################################################################
#!/usr/bin/env python
from pylons import g
import os, re, sys, socket, time, smtplib
import subprocess
from datetime import datetime, timedelta
from r2.lib.wrapped import Wrapped
host = g.reddit_host
default_services = ['newreddit']
def is_db_machine(host):
class AppServiceMonitor(Wrapped):
"""
Given a host name, checks the list of known DB machines to
determine if the host is one of them.
"""
for db in g.databases:
ip = list(g.to_iter(getattr(g, db + "_db")))[1]
name = socket.gethostbyaddr(ip)[0]
if (name == host or ("." in host and name.endswith("." + host)) or
name.startswith(host + ".")):
return True
Master controller class for service monitoring. Can be
initialized at the same time as pylons.g provided g is passed in
as the global_config argument. This class has three purposes:
return False
* Fetches Hostlogger instances from the cache for generating
reports (by calling render() as it is a subclass of wrapped).
* keeping track of which machines are DB machines, allowing db
load to be checked and improving load balancing.
* monitoring the local host's load and storing it in the cache.
"""
def __init__(self, hosts = None, global_conf = None):
"""
hosts is a list of machine hostnames to be tracked (will
default to global_conf.monitored_servers if not provided).
Note the ability to pass in the global_conf (aka pylons.g)
to allow for initializing before the app has finished loading.
"""
if not global_conf:
from pylons import g
global_conf = g
self.global_conf = global_conf
self._hosts = hosts or global_conf.monitored_servers
db_info = {}
for db in global_conf.databases:
dbase, ip = list(global_conf.to_iter(
getattr(global_conf, db + "_db")))[:2]
name = socket.gethostbyaddr(ip)[0]
for host in global_conf.monitored_servers:
if (name == host or
("." in host and name.endswith("." + host)) or
name.startswith(host + ".")):
db_info[db] = (dbase, ip, host)
self._db_info = db_info
self.hostlogs = []
Wrapped.__init__(self)
def database_load(self, db_name):
if self._db_info.has_key(db_name):
return self.server_load(self._db_info[db_name][-1])
@staticmethod
def server_load(mach_name):
h = HostLogger.from_cache(host, self.global_conf)
return h.load.most_recent()
def __iter__(self):
return iter(self.hostlogs)
def render(self, *a, **kw):
self.hostlogs = [HostLogger.from_cache(host, self.global_conf)
for host in self._hosts]
self.hostlogs = filter(None, self.hostlogs)
return Wrapped.render(self, *a, **kw)
def monitor(self, *a, **kw):
host = self.global_conf.reddit_host
h = (HostLogger.from_cache(host, self.global_conf) or
HostLogger(host, self))
return h.monitor(self, *a, **kw)
def is_db_machine(self, host):
"""
Given a host name, checks the list of known DB machines to
determine if the host is one of them.
"""
return any(host == name for d2,ip,name in self._db_info.values())
class DataLogger(object):
@@ -100,9 +159,11 @@ class Database(object):
self.connections = DataLogger()
self.ip_conn = {}
self.db_conn = {}
self.query_count = DataLogger()
def track(self, conn = 0, ip_conn = {}, db_conn = {}, vacuums = {}):
def track(self, conn = 0, ip_conn = {}, db_conn = {}, vacuums = {},
query_count = None):
#log the number of connections
self.connections.add(conn)
@@ -119,16 +180,23 @@ class Database(object):
# log vacuuming
self.vacuuming = [k for k, v in vacuums.iteritems() if v]
# has a query count
if query_count is not None:
self.query_count.add(query_count)
class HostLogger(object):
cache_key = "machine_datalog_data_"
def __init__(self, host):
@classmethod
def cache(self, global_conf):
return global_conf.rendercache
def __init__(self, host, master):
self.host = host
self.load = DataLogger()
self.services = {}
self.database = Database() if is_db_machine(host) else None
self.database = Database() if master.is_db_machine(host) else None
def service_pids(self):
return self.services.keys()
@@ -147,24 +215,25 @@ class HostLogger(object):
else:
self.services[pid].age = int(age / 60)
def set_cache(self):
def set_cache(self, global_conf):
key = self.cache_key + str(self.host)
g.rendercache.set(key, self)
self.cache(global_conf).set(key, self)
@classmethod
def from_cache(cls, host):
def from_cache(cls, host, global_conf):
key = cls.cache_key + str(host)
return g.rendercache.get(key)
return cls.cache(global_conf).get(key)
def clean_dead(self, age = 10):
time = datetime.now()
for pid, s in list(self.services.iteritems()):
t = s.last_update()
if not t or t < time - timedelta(0, age):
if not t or t < time - timedelta(0, age) or pid < 0:
del self.services[pid]
def monitor(self, srvname = None, loop = True, loop_time = 2,
def monitor(self, service_monitor,
srvname = None, loop = True, loop_time = 2,
srv_params = {}, top_params = {}, db_params = {}):
while True:
# (re)populate the service listing
@@ -187,10 +256,8 @@ class HostLogger(object):
self.load.add(float(foo.split(' ')[1].strip(',')))
handle.close()
self.clean_dead()
self.set_cache()
self.set_cache(service_monitor.global_conf)
if loop:
time.sleep(loop_time)
@@ -205,15 +272,6 @@ class HostLogger(object):
yield s[pid]
class AppServiceMonitor(Wrapped):
def __init__(self, hosts = None):
hosts = hosts or g.monitored_servers
self.hostlogs = [HostLogger.from_cache(host) for host in hosts]
self.hostlogs = filter(lambda x: x, self.hostlogs)
def __iter__(self):
return iter(self.hostlogs)
def Alert(restart_list = ['MEM','CPU'],
alert_recipients = ['nerds@reddit.com'],
@@ -223,7 +281,7 @@ def Alert(restart_list = ['MEM','CPU'],
p = re.compile("newreddit(\d+)")
cache_key = 'already_alerted_'
from pylons import g
for host in AppServiceMonitor(g.monitored_servers):
for service in host:
# cpu values
@@ -292,7 +350,7 @@ def run_top(proc_ids = [], name = '', exe = "/usr/bin/top"):
if not os.path.exists(exe):
raise ValueError, "bad executable specified for top"
cmd = [exe, '-b', '-n1'] + ["-p%d" % x for x in proc_ids]
cmd = [exe, '-b', '-n1'] + ["-p%d" % x for x in proc_ids if x > 0]
handle = subprocess.Popen(cmd, stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
@@ -316,19 +374,23 @@ def run_top(proc_ids = [], name = '', exe = "/usr/bin/top"):
def supervise_list(exe = "/usr/local/bin/svstat", path = '/service/'):
handle = os.popen("%s %s*" % (exe, path))
defunct = 0
for line in handle:
line = line.split(' ')
name = line[0]
try:
name, status, blah, pid, time, label = line.split(' ')[:6]
status, blah, pid, time = line[1:5]
name = name[len(path):].strip(':')
if status == 'up':
pid = int(pid.strip(')'))
time = int(time)
else:
pid = -1
time = 0
yield (name, status, pid, time)
raise ValueError, "down process"
except ValueError:
pass
defunct += 1
pid = -defunct
time = 0
yield (name, "down", pid, time)
handle.close()
def check_database(proc = "postgres", check_vacuum = True, user='ri'):
@@ -354,21 +416,33 @@ def check_database(proc = "postgres", check_vacuum = True, user='ri'):
vacuums = {}
if check_vacuum:
vac = ("(echo '\t'; echo 'select * from active;') " +
vac = ("(echo '\\t'; echo 'select * from active;') " +
"| psql -U %(user)s %(db)s | grep -i '| vacuum'")
for db in by_db:
handle = os.popen(vac % dict(user=user, db=db))
vacuums[db] = bool(handle.read())
handle.close()
return dict(conn = total,
ip_conn = by_ip,
db_conn = by_db,
vacuums = vacuums)
res = dict(conn = total, ip_conn = by_ip, db_conn = by_db,
vacuums = vacuums)
if 'query_queue' in by_db:
cmd = ("(echo '\t'; echo 'select count(*) from reddit_query_queue;') "
"| psql -U %(user)s query_queue ")
handle = os.popen(cmd % dict(user = user))
for line in handle:
try:
res['query_count'] = int(line.strip('\n '))
break
except ValueError:
continue
handle.close()
return res
def Run(*a, **kw):
HostLogger(g.reddit_host).monitor(*a, **kw)
from pylons import g
AppServiceMonitor(global_conf = g).monitor(*a, **kw)
def Test(num, load = 1., pid = 0):
services = Services()
@@ -383,4 +457,4 @@ def Test(num, load = 1., pid = 0):
services.set_cache()
if __name__ == '__main__':
Run(sys.argv[1:] if sys.argv[1:] else default_services)
Run(sys.argv[1:] if sys.argv[1:] else ['newreddit'])