From b448c7bb01300cbfb6fe1341265f281a31d5c4fd Mon Sep 17 00:00:00 2001 From: shuffman Date: Tue, 8 Jul 2008 14:53:33 -0700 Subject: [PATCH] crop images into squares before thumbnailing --- r2/r2/lib/scraper.py | 32 +++++++++++++++++++++++++++++++- r2/r2/public/static/noimage.png | Bin 0 -> 1997 bytes 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 r2/r2/public/static/noimage.png diff --git a/r2/r2/lib/scraper.py b/r2/r2/lib/scraper.py index d7690fd97..1ede38480 100644 --- a/r2/r2/lib/scraper.py +++ b/r2/r2/lib/scraper.py @@ -27,7 +27,7 @@ from r2.lib.memoize import memoize from urllib2 import Request, HTTPError, URLError, urlopen from httplib import InvalidURL import urlparse, re, urllib, logging, StringIO, logging -import Image, ImageFile +import Image, ImageFile, math from BeautifulSoup import BeautifulSoup log = g.log @@ -48,6 +48,35 @@ def str_to_image(s): image = Image.open(s) return image +def image_entropy(img): + """calculate the entropy of an image""" + hist = img.histogram() + hist_size = sum(hist) + hist = [float(h) / hist_size for h in hist] + + return -sum([p * math.log(p, 2) for p in hist if p != 0]) + +def square_image(img): + """if the image is taller than it is wide, square it off. determine + which pieces to cut off based on the entropy pieces.""" + x,y = img.size + while y > x: + #slice 10px at a time until square + slice_height = min(y - x, 10) + + bottom = img.crop((0, y - slice_height, x, y)) + top = img.crop((0, 0, x, slice_height)) + + #remove the slice with the least entropy + if image_entropy(bottom) < image_entropy(top): + img = img.crop((0, 0, x, y - slice_height)) + else: + img = img.crop((0, slice_height, x, y)) + + x,y = img.size + + return img + def clean_url(url): """url quotes unicode data out of urls""" s = url @@ -180,6 +209,7 @@ class Scraper: if image_str: image = str_to_image(image_str) try: + image = square_image(image) image.thumbnail(thumbnail_size, Image.ANTIALIAS) except IOError, e: #can't read interlaced PNGs, ignore diff --git a/r2/r2/public/static/noimage.png b/r2/r2/public/static/noimage.png new file mode 100644 index 0000000000000000000000000000000000000000..bf6c74ca2dbe188c32376fe94c4c9530306368eb GIT binary patch literal 1997 zcmV;;2Qv7HP)t|NqB*$itRZx}kJS7o&lofQ-mxfc-Itj3W#)V1#T^>C&m0q3J$fpL4$R zb>kTf2Fmx8mt#$@6KN#x-EHZyr*;UOziK({)`QGnq^-m-_}FVRy6H z!~?-#(CUg#rvn}eMn0eCWw~4~l}bLJZ*z0={|*U58;yqbjbH;3{@L5x3x~tJj7Fn{ zLgD)Q`Wt`*9wI7{Nc4I=++|*Ad@6XPMIw=_t1IWx+FkwpW4(6QYIR28s`|XiweZVS zbTt%M1|&)S6{CsK=0{k6d3gyu6BV6R)oJ!}_3rOtW$dNi>wm0j{N<8sKV{b!PU2WR z<_fdnHvpWTp4MtLJjo5$bUHm39&5X(+!l)03-{9f{k?c;@*~m-Vt^q4V=~^fMC$eX zCzm(oV?Gln(tKVoaag*!J1f=XJ-$lBRuXIB1wz70+g#*zZ5TbaG?AzbHr~2E@g+-EK>TJv=0WSLOO$Ks*A|&Rfs^A&ypl(uCp0GQ>bXEjx z41#BHnv6`i;zJbRG|&8b{K2F_Y48X9ZYzuIa|!)IXi03~1TgT1M{c&ZwlHWW1pOZN zD*nJ&!DSGsxpL2FM5BY=Bh0LLka;iGBik=xlm3|s>HPe>QmIg3Boe?zY%uZLw{J&f z=f0sd8;Va=))I=>XAkD`bl7beWKkvi#@dZR_V@SSzkd%f(Mq%XndE`ur-8ADL{xx> zX;?yeBrphlVtwSCPer9x9`(MhD7jr5RMpZ5kKpGknfzGIWk67|SRizjux49b@jK(O zDYcR+a+a4ca@(s;D6LUzGQlPkb#=W3-a`B|v5OPcg)jndjU|3Nfn;W(hb@BA0XiQ8s zS4hv9a!f@xt+u-45_1rHPy)dK7@I8(xlZcqMl7YahS_f;X~N2@edVsE^mJt{>EJZs z8|HE-O6Vi%PhD%rB0<+gvVlY-2*nLGuk98Mb6@}J?tWmz)2_W{tOv2xP|Q)Px+rnT z8d>mMvfA~m;@Kzi+OC}N&*DxGXW0mk38H^Xb*@i8A(A;*Lgy_V9Y{QLqZ&8byoY2W zd7befh=vSYs{OI|lGJ(%Bpik4l3m?2$tOo5W{OKuo(mFs57h^$y1>4S^_a@qDEv5wVuF4hOL44}GBCDAW8*L~x{l>=EwU(}i^?rB4|T`A>q<>k;#hWk zeC+r8(PNXJ;vz8HT2m+#g28b{Znrg9$BKt;9*+n0Jxj(CdqU!_QRlL`5H>gU-i&(( zvWp>8sqDr^+Ohx-!VQLsg0 z`HqFI;v3P=%WtUM^4XFg4bNz{w?RTuu}ed?IK(}BZ?1BXkGtq!*KR`Lhh%b$mFGNr za}Uj2F2};38I8Dtpm?-;OyW%rge8z9J-T{J(TB%Iy-u^ERXu@~zBlCSwfrj)ue!jB zH`6W!0d{b3P&S(E+hl{tmB<8-{4gQc@!epz)xQ0d+T8N3rK>HaqqW=he?qZ@$K(B) zkW7gQ?cyJiwzx%h*VJV)4YZUdyd-Bp!)P*$?sbO;Afb3VWK306f^9uB(3>9rNV5(4 zI{z%qvz!6W2MP9r9^rK2>fO6{rp%SC0aqZ=2dD7}9$)aG5Y>%B6*a=`#y?ODR_N=U z&Rw-{U_LrJay}&bB8}&8a0LMHm;pwqHJK0!`OE}?p5hCxr958Mcp3uJN@t*I>B_3F z*Kd?PYCvymt=g5R-(?U=7K5-NOZAcnIng_kPIA6uJ**UE)C+DrS~E2eie>Od6H=ONiJ>&9PWWa^x*Wi*B&WIY8n%xKXHlJQDn&Q2f!xw%e@9 zq6i)amtRDeolDkj;+XoxAmT?3xHav@Uep*T$+9SfYS&l@V6Ya9psMO~Rohu1$#PC^ zH%I_@PY#5KyZGs{8oTK!N)H(D0l_RWjOsT%wme7@W)RxL0hoaYn=pl!bM0jLmTW6n zAVzyG`kwokUw##roW>6Bp#&*Pv+T5Ge+7P=GuN*b3xs5%vbxSy&RRDeKqAISMbc*0 fgZ25_e*z2unSn<+HaH?t00000NkvXXu0mjfe2LBN literal 0 HcmV?d00001