Документ взят из кэша поисковой машины. Адрес оригинального документа : http://theory.sinp.msu.ru/pipermail/ru-ngi/attachments/20120608/ff3b8c3f/attachment-0001.py
Дата изменения: Fri Jun 8 19:18:50 2012
Дата индексирования: Tue Oct 2 10:15:56 2012
Кодировка:
#!/bin/env python
# Eygene Ryabinkin, rea@grid.kiae.ru. Copyright 2009, 2011.
#
# Simple program to walk DPNS database and perform the selected
# mappings inside the given stream of space-separated fields.
#
# Usage: $0 {sfn2turl | fileid2sfn} [field-id]
# field-id is the one-based number of the field where PFN
# to be translated will live.
# Utility will perform the translation and replace the
# translated items, leaving all other fields untouched.
# Fields will be joined by single-space on output.

import os
import sys
import MySQLdb
import MySQLdb.cursors

srm_pfx = "srm://se.grid.kiae.ru/"
metadata_tbl = "Cns_file_metadata"
replica_tbl = "Cns_file_replica"
dpminfo = '/usr/etc/DPMINFO'
join_string = " "


def usage():
sys.stderr.write("Usage: %s {sfn2turl | fileid2sfn} [field-number]\n" % \
sys.argv[0]);


class MappingError(Exception):
"""
Class for exceptions that are raised by the mapping drivers.

"""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)


class Sfn2Turl:
"""
Mapping driver for SFN -> TURL translation.

Storage file name is the identification of the file in the
site's file system (DPM, dCache).

TURL is the transport URL or the physical location of the
file within the pool servers.
"""

def __init__(self, dbc, srm_pfx):
# This is the cache for the path IDs to avoid the swarm
# of DB queries.
self.path_cache = {}
# Database connection
self.dbc = dbc
# Prefix of our SRM SFN's
self.srm_pfx = srm_pfx


def map(self, sfn):
"""
Maps the given SFN to the TURL.

"""
debug("Will process SFN = '%s'" % sfn)
if sfn[0] != '/':
if sfn[0:len(self.srm_pfx)] != self.srm_pfx:
raise MappingError("Not within our namespace '%s'" % self.srm_pfx)
sfn = sfn[len(self.srm_pfx):]

id = self._map_sfn_to_id(sfn)
debug("Got SFN's id = '%s'" % id)
turl = self._map_id_to_turl(id)
debug("The corrresponding TURL is '%s'" % turl)

return turl


def _map_sfn_to_id(self, path):
"""
Returns ID of the filesystem object using cache and DB lookups.

Parameters:
path -- path whose ID is to be determined.

"""
path = path.lstrip("/")

if path in self.path_cache:
return self.path_cache[path]

# Determine root ID
if "/" in self.path_cache:
root_id = self.path_cache["/"]
else:
self.dbc.execute("select fileid from " +
metadata_tbl + " where name = '/' and parent_fileid = 0")
result = c.fetchone()
if result == None:
raise MappingError("Can't determine fileid for root node")
root_id = result["fileid"]
self.path_cache["/"] = root_id


comps = path.split('/')
debug("Path components are %s" % comps)
n = len(comps)
while n > 0:
ipath = "/".join(comps[:n])
if ipath in self.path_cache:
id = self.path_cache[ipath]
break
n -= 1

if n < 1:
# Searching from root.
n = 1
id = root_id
else:
n += 1

for i in range(n, len(comps) + 1):
ipath = "/".join(comps[:i])
last = comps[i - 1]
debug("At idx %d: looking for '%s' at parent '%s'" % (i, last, id))
self.dbc.execute("select fileid from " +
metadata_tbl + " where name = %s and parent_fileid = %s",
(last, id))
result = c.fetchone()
if result == None:
raise MappingError("Error looking for '%s' in parent_fileid = %s" % \
(ipath, id))
id = result["fileid"]
self.path_cache[ipath] = id

return id


def _map_id_to_turl(self, id):
self.dbc.execute("select sfn from " + replica_tbl +
" where fileid = %s", (id, ))
result = self.dbc.fetchone()
if result == None:
raise MappingError("Can't map fileid '%s' to SFN" % id)
return str(result["sfn"])



class Fileid2Sfn:
"""
Mapping driver for DPNS fileid -> SFN translation.

Storage file name is the identification of the file in the
site's file system (DPM, dCache).
"""

def __init__(self, dbc, srm_pfx):
# This is the cache for the fileid paths
# to avoid the swarm of DB queries.
self.fileid_cache = {}
# Database connection
self.dbc = dbc
# Prefix of our SRM SFN's
self.srm_pfx = srm_pfx


def map(self, fileid):
"""
Maps the given fileid to the SFN.

"""
debug("Will process fileid = '%s'" % fileid)

path_list = self._map_fileid_to_sfn(fileid, [])
debug("Got path list = '%s'" % str(path_list))

return self.srm_pfx + "/".join(path_list)


def _map_fileid_to_sfn(self, fileid, path_list):
"""
Returns the list of path names that correspond
to the given fileid.

We are recursive, because we also want to fill
the fileid cache.

Parameters:
fileid -- fileid whose path is to be determined.
path_list -- existing path components. Ought to be
the empty list for the first invocation.

"""
if fileid in self.fileid_cache:
return self.fileid_cache[fileid]

debug("Working with fileid '%s'" % (fileid))
self.dbc.execute("select name, parent_fileid from " +
metadata_tbl + " where fileid = %s", (fileid))
result = c.fetchone()
if result == None:
raise MappingError("Error looking for" + \
" parent of fileid '%s'" % (fileid))
name = result["name"]
debug("Fileid '%s' is named '%s'" % (fileid, name))
if name != "/":
path_list.insert(0, name)
parent = result["parent_fileid"]
path_list = self._map_fileid_to_sfn(parent,
list(path_list))
self.fileid_cache[fileid] = path_list

debug("Path list for fileid '%s': %s" % (fileid, path_list))
return path_list


def debug(message):
"""
Outputs debugging message if environment variable DEBUG
is set.

"""
if os.getenv("DEBUG", False) != False:
sys.stderr.write(message + "\n")

def err(message):
"""
Dumps the message to stderr.

"""
sys.stderr.write(message + "\n");


def errx(exit_code, message):
"""
Dumps the message to stderr and terminates the execution
with the given exit code.

"""
sys.stdout.write(message + "\n")
sys.exit(exit_code)


if __name__ == "__main__":
drivers = {
'sfn2turl': Sfn2Turl,
'fileid2sfn': Fileid2Sfn,
}

if len(sys.argv) < 2:
usage()
sys.exit(1)
elif len(sys.argv) < 3:
field_no = 1
else:
try:
field_no = int(sys.argv[2])
except ValueError, e:
errx(1, "Bad field number '%s'.\n" % sys.argv[2])
if field_no < 1:
errx(1, "Field number must be greater than 1.")
field_no = field_no - 1

if not drivers.has_key(sys.argv[1]):
errx(1, "Bad mapping name '%s'. Valid names: %s" % \
(sys.argv[1], drivers.keys()))

try:
info_fd = open(dpminfo, "r")
except Exception, e:
errx(1, "Unable to open '%s': %s.\n" % (dpminfo, e))
try:
(user, pw_host, db) = info_fd.readline().rstrip().split("/", 3)
(pw, host) = pw_host.split("@", 2)
except Exception, e:
info_fd.close()
errx(1, "Can't determine MySQL parameters from '%s': %s.\n" % \
(dpminfo, e))
info_fd.close()

db = MySQLdb.connect(host=host, user=user, passwd=pw, db=db, \
cursorclass = MySQLdb.cursors.DictCursor)

c = db.cursor()

map_driver = drivers[sys.argv[1]](c, srm_pfx)

for line in sys.stdin:
line_fields = line.rstrip().split()
debug("Got '%s'." % join_string.join(line_fields))
if field_no >= len(line_fields):
debug("Field isn't found, just dumping.")
sys.stdout.write(join_string.join(line_fields) + "\n")
continue
subject = line_fields[field_no]
try:
mapped = map_driver.map(subject)
except MappingError, e:
err("Can't find mapping for '%s': %s." % \
(subject, e))
continue

line_fields[field_no] = mapped
sys.stdout.write(join_string.join(line_fields) + "\n")

c.close()
sys.exit(0)