Trying to make nsrlupdate work for Python 2. Probably horribly broken right now. DO NOT USE IN PRODUCTION.

This commit is contained in:
Robert J. Hansen
2015-02-10 00:05:34 -05:00
parent ad0198de66
commit 504ec25f73
8 changed files with 59 additions and 133 deletions

13
INSTALL
View File

@@ -15,9 +15,9 @@ Installation instructions:
(Note: OS X lacks support for large ZIP files. You may want to
use p7zip instead on OS X.)
4a. If you're running from git's master branch, run bootstrap.sh.
4b. Run the ./configure script.
4. Run bootstrap.sh followed by the configure script. (Note: you
will need the GNU Autotools installed, specifically automake and
autoconf.)
5. Once you've completed the "make && make install" dance, the
following applications will be installed:
@@ -29,10 +29,15 @@ Installation instructions:
(as explained in the next step) before using nsrlsvr.
6. To create the data file, use nsrlupdate. E.g., if your RDS is
stored at /home/rjh/RDS_247m/NSRFile.txt:
stored at /home/rjh/RDS_247m/NSRLFile.txt:
$ sudo nsrlupdate /home/rjh/RDS_247m/NSRLFile.txt
Warning: *this might take a long time*. As of RDS 2.47m, there
are over 40 million hashes there constituting a hashes.txt file
of 1,333,423,179 bytes (!!). That takes a few minutes to extract
and write to disk. You'll have time for a cup of coffee.
7. You can now start nsrlsvr!
$ nsrlsvr

View File

@@ -1,4 +1,4 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# Makefile.in generated by automake 1.13.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
@@ -331,8 +331,8 @@ $(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__aclocal_m4_deps):
config.h: stamp-h1
@test -f $@ || rm -f stamp-h1
@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
@if test ! -f $@; then rm -f stamp-h1; else :; fi
@if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
@@ -532,16 +532,10 @@ dist-xz: distdir
$(am__post_remove_distdir)
dist-tarZ: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir)
dist-shar: distdir
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__post_remove_distdir)
@@ -583,10 +577,9 @@ distcheck: dist
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \
&& ../configure \
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
--srcdir=.. --prefix="$$dc_install_base" \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \

57
aclocal.m4 vendored
View File

@@ -1,4 +1,4 @@
# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
# generated automatically by aclocal 1.13.4 -*- Autoconf -*-
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
@@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.14'
[am__api_version='1.13'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.14.1], [],
m4_if([$1], [1.13.4], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@@ -51,7 +51,7 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.14.1])dnl
[AM_AUTOMAKE_VERSION([1.13.4])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
@@ -418,12 +418,6 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# This macro actually does too much. Some checks are only needed if
# your package does certain things. But this isn't really a big deal.
dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
m4_define([AC_PROG_CC],
m4_defn([AC_PROG_CC])
[_AM_PROG_CC_C_O
])
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
# AM_INIT_AUTOMAKE([OPTIONS])
# -----------------------------------------------
@@ -532,48 +526,6 @@ dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below.
AC_CONFIG_COMMANDS_PRE(dnl
[m4_provide_if([_AM_COMPILER_EXEEXT],
[AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
# POSIX will say in a future version that running "rm -f" with no argument
# is OK; and we want to be able to make that assumption in our Makefile
# recipes. So use an aggressive probe to check that the usage we want is
# actually supported "in the wild" to an acceptable degree.
# See automake bug#10828.
# To make any issue more visible, cause the running configure to be aborted
# by default if the 'rm' program in use doesn't match our expectations; the
# user can still override this though.
if rm -f && rm -fr && rm -rf; then : OK; else
cat >&2 <<'END'
Oops!
Your 'rm' program seems unable to run without file operands specified
on the command line, even when the '-f' option is present. This is contrary
to the behaviour of most rm programs out there, and not conforming with
the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
Please tell bug-automake@gnu.org about your system, including the value
of your $PATH and any error possibly output before this message. This
can help us improve future automake versions.
END
if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
echo 'Configuration will proceed anyway, since you have set the' >&2
echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
echo >&2
else
cat >&2 <<'END'
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
that behaves properly: <http://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
to "yes", and re-run configure.
END
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
fi
fi
])
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
@@ -582,6 +534,7 @@ dnl mangled by Autoconf and run in a shell conditional statement.
m4_define([_AC_COMPILER_EXEEXT],
m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
# When config.status generates a header, we must update the stamp-h file.
# This file resides in the same directory as the config header
# that is generated. The stamp files are numbered to have different names.

44
configure vendored
View File

@@ -2558,7 +2558,7 @@ $as_echo "$am_cv_python_pyexecdir" >&6; }
ac_config_headers="$ac_config_headers config.h"
am__api_version='1.14'
am__api_version='1.13'
ac_aux_dir=
for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
@@ -3124,48 +3124,6 @@ am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
# POSIX will say in a future version that running "rm -f" with no argument
# is OK; and we want to be able to make that assumption in our Makefile
# recipes. So use an aggressive probe to check that the usage we want is
# actually supported "in the wild" to an acceptable degree.
# See automake bug#10828.
# To make any issue more visible, cause the running configure to be aborted
# by default if the 'rm' program in use doesn't match our expectations; the
# user can still override this though.
if rm -f && rm -fr && rm -rf; then : OK; else
cat >&2 <<'END'
Oops!
Your 'rm' program seems unable to run without file operands specified
on the command line, even when the '-f' option is present. This is contrary
to the behaviour of most rm programs out there, and not conforming with
the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
Please tell bug-automake@gnu.org about your system, including the value
of your $PATH and any error possibly output before this message. This
can help us improve future automake versions.
END
if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
echo 'Configuration will proceed anyway, since you have set the' >&2
echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
echo >&2
else
cat >&2 <<'END'
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
that behaves properly: <http://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
to "yes", and re-run configure.
END
as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
fi
fi
# Check whether --enable-silent-rules was given.
if test "${enable_silent_rules+set}" = set; then :
enableval=$enable_silent_rules;

View File

@@ -1,4 +1,4 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# Makefile.in generated by automake 1.13.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.

View File

@@ -20,14 +20,17 @@ directory you'll find a file called "NSRLFile.txt"; make note of the path
to it. Then call nsrlupdate, passing the path to NSRLFile.txt as an
argument, and be prepared to wait for a while.
nsrlupdate requires a lot of memory. Try to avoid running it on systems
with under 8Gb RAM, as painful swapping will almost certainly occur.
nsrlupdate doesn't require much memory, but it may take a lot of time.
As of RDS 2.47m, there are over 40 million hashes to extract. The final
hashes.txt file will be around 1.3Gb in size. Expect this to only go up
as subsequent RDSes are released.
nsrlupdate will wipe out the current contents of the hash database, so
be careful if you've appended your own custom dataset.
.Sh BUGS
None known.
nsrlupdate is a Frankenstein's monster of Python 2 and Python 3 support.
The good news is, it's a fairly small script.
.Sh SEE ALSO
nsrlsvr(1)
.Sh AUTHOR

View File

@@ -1,4 +1,4 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# Makefile.in generated by automake 1.13.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.

View File

@@ -1,32 +1,46 @@
#!/usr/bin/env python
#coding=UTF-8
from __future__ import print_function
from __future__ import print_function, unicode_literals
from re import compile
from os import sep
from os.path import exists
from sys import argv
from sys import argv, version_info
def process_rds(filename):
(major, minor, micro, release, serial) = version_info
def open_fh(fn):
if major == 2:
return open(fn, "r")
else:
return open(fn, "r", errors="replace", encoding="ISO-8859-1")
def unicodify(line):
if major == 2:
return unicode(line, errors="replace", encoding="ISO-8859-1")
else:
return line
try:
hashes = []
with open(filename, "r", encoding="ISO-8859-1") as fh:
md5re = compile(r'^"?([A-F0-9]{32})"?$')
line = fh.readline()
while line:
cols = line.split(",")
if len(cols) > 2:
match = md5re.match(cols[1])
if match:
hashes.append(match.group(1))
line = fh.readline()
hashes.sort()
with open(sep.join(["@pkgdatadir@", "hashes.txt"]), "w") as out:
[print(X, file=out) for X in hashes]
with open_fh(filename) as fh:
with open(sep.join(["@pkgdatadir@", "hashes.txt"]), "w") as out:
md5re = compile(r'^"?([A-F0-9]{32})"?$')
line = unicodify(fh.readline())
while line:
cols = line.split(",")
if len(cols) > 2:
match = md5re.match(cols[1])
if match:
md5 = match.group(1) + "\n"
out.write(md5.encode("ASCII"))
line = unicodify(fh.readline())
except IOError as e:
print("I/O error while trying to process " + filename)
print("(Does your disk have enough free space?)")
print(
"""(This is normally caused by one of two things: either you're out of disk
space, or you lack privileges to write to @pkgdatadir@/hashes.txt file.)""")
if __name__=='__main__':
if len(argv) != 2 or not exists(argv[1]):