Документ взят из кэша поисковой машины. Адрес
оригинального документа
: http://www.stsci.edu/~kgordon/papers/pyadspapers.py
Дата изменения: Thu Jan 21 19:31:34 2016 Дата индексирования: Sun Apr 10 15:25:32 2016 Кодировка: |
')
out_file.write(self.bibinfo['abstract'].encode('utf-8'))
out_file.write('
')
out_file.write('[ADS]')
out_file.write('
')
out_file.write('ADS Citation Query' +
'
\n')
if self.num_cites > 0:
out_file.write('# citations = '+repr(self.num_cites)+'
\n')
out_file.write('citations vs. year [year,citations]
\n')
ckeys = self.num_cites_year.keys()
ckeys.sort()
x = []
y = []
for k in ckeys:
out_file.write('['+k+','+repr(self.num_cites_year[k])+']')
x.append(k)
y.append(self.num_cites_year[k])
out_file.write('\n')
# create the plot of the citations per year
fig = p.figure()
ax = fig.add_subplot(1,1,1)
ind = range(len(y))
ax.bar(ind, y, facecolor='#777777', ecolor='black', align='center')
ax.set_ylabel('Citations')
ax.set_title(self.bibcode,fontstyle='italic')
ax.set_xticks(ind)
ax.set_xticklabels(x)
fig.autofmt_xdate()
cite_plot_file = self.bibcode+'_cites_per_year.png'
p.savefig(cite_plot_file, dpi=50, bbox_inches='tight',
pad_inches=0.25)
p.close()
# insert the plot in the html file
out_file.write('
\n')
out_file.write('
\n')
out_file.write('
\n')
out_file.write('Copyright © 2012\n')
out_file.write('' +
'Karl D. Gordon\n')
out_file.write('All Rights Reserved\n')
out_file.write('
#!/usr/bin/env python
#
# Code created to help me learn python as well as fully automate
# the creation of HTML pages for all my papers and index pages
# with various sorts. This used to be done with IDL code and a by
# hand created flat ASCII "database" file.
#
# The code queries ADS for to get the list of bibcodes for one author.
# Then it grabs the information for each paper, generates an HTML
# page for each paper, and an set of index pages with different sorts.
# Plots are even generated (new for the python version).
#
# Use at your own risk. I wrote this for my personal use and to learn
# python. I image it will immediately break if you try and use it.
# And that there are much more efficient ways to impliment this code.
# But, I'd be happy to hear from you if you find it useful and/or
# improve the code.
#
# Written: 2010-2012 (at least that is my guess)
# Initital version finished: 19 Oct 2012 (Karl D. Gordon: kgordon@stsci.edu,
# in a hotel room in Belgium)
# 2013-2015 : updates to improve funtionality and make prettier plots
# Jan 2016 : updates stareted to be compatible with python 3 and PEP8
#
import urllib
import sys
if sys.version_info >= (3, 0):
from html.parser import HTMLParser
else:
from HTMLParser import HTMLParser
import xml.etree.cElementTree as ET
import math
import pylab as p
from operator import itemgetter, attrgetter
import string
import pickle
#from pytagcloud import create_tag_image, make_tags
#from pytagcloud.lang.counter import get_tag_counts
# Objext for a single paper
class Paper:
def __init__(self, bibcode, get_citations):
self.bibinfo = {}
# get the ADS entry and determine the basic info on this paper
data = ET.parse(urllib.urlopen("http://adsabs.harvard.edu/abs/" +
bibcode + "&data_type=XML"))
root = data.getroot()
a = root.getchildren()
alltags = a[0].getchildren()
self.num_cites = 0
self.bibinfo['volume'] = '1'
self.author_rank = 0
for curtag in alltags:
z = len(curtag.tag)
subtag = curtag.tag[49:z]
if subtag == "bibcode":
self.bibcode = curtag.text
elif subtag == "title":
self.bibinfo['title'] = curtag.text
elif subtag == "abstract":
self.bibinfo['abstract'] = curtag.text
elif subtag == "pubdate":
self.bibinfo['date'] = curtag.text[4:8]
elif subtag == "journal":
self.bibinfo['journal'] = curtag.text
elif subtag == "volume":
self.bibinfo['volume'] = curtag.text
elif subtag == "page":
self.bibinfo['page'] = curtag.text
elif subtag == "lastpage":
self.bibinfo['lastpage'] = curtag.text
elif subtag == "author":
if 'authors' in self.bibinfo:
self.bibinfo['authors'].append(curtag.text)
else:
self.bibinfo['authors'] = [curtag.text]
if not string.find(curtag.text,'Gordon'):
self.author_rank = len(self.bibinfo['authors'])
elif subtag == "citations":
self.num_cites = int(curtag.text)
self.ave_cites_per_year = 0.
self.n_years = 0
if (get_citations != 0) and (self.num_cites > 0):
self.num_self_cites = 0
self.num_cites_year = {}
self.num_self_cites_year = {}
amp_sym = bibcode.find('&')
if (amp_sym > 0):
bibcode = bibcode[0:amp_sym] + '%26' + \
bibcode[amp_sym+1:len(bibcode)]
# now get the number of citations per year
data = ET.parse(urllib.urlopen('http://adsabs.harvard.edu/cgi-bin/' +
'nph-ref_history?bibcode=' +
bibcode.encode('utf-8') +
"&refs=CITATIONS&data_type=XML"))
root = data.getroot()
cites = root.getchildren()
# need this variable to be able to sort, probably shouldn't use a
# dictionary, but I did to start
for v in cites:
a = v.attrib
self.num_cites_year[a["year"]] = int(a["total"])
self.ave_cites_per_year += float(a["total"])
self.n_years += 1
self.ave_cites_per_year /= self.n_years
def WriteHtml(self):
# open the file for writing
out_file = open(self.bibcode+'.html', 'w')
out_file.write('\n')
out_file.write('
out_file.write(' PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n')
out_file.write(' "DTD/xhtml1-transitional.dtd">\n')
out_file.write('\n')
out_file.write('
'xml:lang="en" lang="en">\n')
out_file.write('
\n')
out_file.write('
'href="../kgmain.css" />')
out_file.write('
\n')
out_file.write('\n')
out_file.write('
\n')
out_file.write('