BibDesk is a fantastic tool to organize your library of scientific papers. It can also be used to organize your own publications. On my webpage I prefer to list my publications in a special order, namely organized by the categories journal papers and book chapters, conference papers, and everything else. Ideally, each record would also link to an attached PDF file and all I would have to do is keep my BibDesk database organized and my online list of publications would be kept up-to-date automatically.
Unfortunately, generating publication listings with attached PDF files organized by categories doesn't seem to be so straightforward in BibDesk's very own export mechanism. So here is what I came up with myself, a PERL script that I can call from within emacs to automatically include the list of publications where I need it.
#!/usr/bin/perl -w
# Copyright (C) 2009 by Bjoern Rueffer, Time-stamp: <2009-11-05 00:59:33 bjoern>
# This program TAKES A BIBDESK FILE (essentially a bibtex file) AND
# GENERATES A HTML PUBLICATION LIST FROM IT. This list is formatted
# using CSS tags and its intended use is to be insered into my
# homepage (a single html file as of the time of this writing). This
# programm can be called from within emacs and the actual insertion
# can be performed automatically, e.g., by using a small emacs lisp
# function embedded into the html-file. Something like this:
# <!--
# (let ((beg (progn (search-forward "startinsert")
# (forward-line 1)
# (point))))
# (search-forward "/endinsert")
# (beginning-of-line)
# (delete-region beg (point))
# (shell-command "~/path/to/this/file.pl" 1 "perl output to STDERR")
# ) type C-x C-e after the closing brace to update publication listing
# -->
# <!--startinsert-->
# <!--/endinsert-->
#
# Admittedly, this might not be the most elegant way to do it, but it
# is pretty effective.
# This program REQUIRES Text::BibTeX version >= 0.34 from
# http://starship.python.net/~gward/btOOL/ for reading the bibtex file
# and formatting author names etc. A very handy tool!
# All CONFIGURATION is currently hard-coded into this file; A bibtex
# file "$bibdeskfile" is read and the generated HTML output is pasted
# into STDOUT. File attachments (via the local-url-fields and assumed
# to be PDFs) will be copied to a directory
# "$attachmentdirabsolute". During the process, lists of authors will
# be condensed, by removing any author whose name matches /R.*ffer/ --
# you might want to adapt that for your purpuses.
# Use this software for whatever you want at your own risk, but don't
# blame me for anything. If you want to report any improvements you've
# made back to me, please do so to the email address to be found at
# http://bjoern.rueffer.info. I'd appreciate that!
use warnings;
use strict;
use Text::BibTeX qw(:nameparts :joinmethods);
use Text::BibTeX::Name;
use Text::BibTeX::NameFormat;
### setup
my $attachmentdirrelative = "attachments"; # relative directory name: where to link to for attached files
my $attachmentdirabsolute = "/path/to/Homepage/attachments"; # absolute directory name: where to put attached files
mkdir $attachmentdirabsolute unless (-d $attachmentdirabsolute);
print STDERR readpipe("rm -f $attachmentdirabsolute/*.pdf")."\n";
my $bibdeskfile = new Text::BibTeX::File "</path/to/Publications/mypublications.bib"; # which bibdesk file to load?
my $nameformat = new Text::BibTeX::NameFormat("fvlj", 1); # name formatting rules, see "man Text::BibTeX::Name"
sub texcleanedstring { # does exactly what its name says, at least it takes care of everything that was needed in my case
$_ = shift;
s/\\verb\|(.+)\|/$1/g; # remove \verb|...|
s/\\em //g; # remove formatting
s/\\ |~/ /g; # remove formatting
s/\{\\"([auoAUO])\}/&$1uml;/g; # aou umlauts into HTML
s/\{|\}//g; # remove curly braces
s/\$//g; # remove dollar signs
s/--/–/g; # correct en dashes
return $_;
}
sub formatauthorstring { # reformat list of authors, put them into "(with x,y and z)"-form
my @authors = split /\s+and\s+/, shift;
my $authors=@authors;
$authors--;
my $f = "";
$f .= "(with " unless $authors==0;
foreach my $author (@authors) { # put authors into a x,y and z format
my $name = new Text::BibTeX::Name($author);
next if ($author =~ /R.*ffer/i); # don't mention yourself
$f .= $name->format($nameformat); # this takes care of the formatting,
if ($authors>2) {
$f .= ", ";
$authors--;
} elsif ($authors==2) {
$f .= " and ";
$authors--;
}
}
$f .= ")" unless @authors==1;
# print $f;
return $f;
}
sub formateditorstring { # similar to formatauthorstring, but different =-)
my @authors = split /\s+and\s+/, shift;
my $authors=@authors;
my $f = "";
foreach my $author (@authors) {
my $name = new Text::BibTeX::Name($author);
next if ($author =~ /R.*ffer/i);
$f .= $name->format($nameformat);
if ($authors>2) {
$f .= ", ";
$authors--;
} elsif ($authors==2) {
$f .= " and ";
$authors--;
}
}
return $f;
}
my %jpapers = (); # journal papers and book chapters
my %cpapers = (); # conference papers
my %miscpapers = (); # theses and reports
my $publicationcounter = 0; # used for reversenumbering publications in html output
while (my $entry = new Text::BibTeX::Entry $bibdeskfile) # for each publication record in the bibtex file...
{
next unless $entry->parse_ok; # yeah, we might want to not consider everything, like @string{} and similar entries
next unless $entry->type =~ /article|inproceedings|incollection|thesis|report/; # or @unpublished entries for that matter
$publicationcounter += 1;
$_= "";
# some of my publication entries have a special key "publish-pdf"
# to indicate whether an attached file should be made public or
# not (this is a boolean field in BibDesk, very convenient to
# handle). I'm assuming here that all attached files are of PDF
# type.
if ($entry->exists('publish-pdf')) { # create a copy of the attached (PDF!) file with a simplified filename
if (($entry->exists('local-url') && $entry->type !~ /thesis|report/) &&
$entry->get('publish-pdf') =~ m/yes|true|1/i) {
my $lurl = $entry->get('local-url'); # this field contains a link to (one of the) attached PDF files, see comment below
$lurl =~ s/`/\\`/g;
$_ .= "<div class=\"pdf\"><a href=\"$attachmentdirrelative/$publicationcounter.pdf\" class=\"pdf\"><img class=\"pdf\" src=\"Oficina-PDF-128x128.png\"></a></div>\n";
print STDERR readpipe("cp \"$lurl\" $attachmentdirabsolute/$publicationcounter.pdf");
}
}
# to actually generate these "local-url" fields, I've used an
# applescript within BibDesk... see other blog post on that
# the next few if-clauses take care of the formatting of each
# individual publication record, as it appears on my online
# publication listing. Obviously journal and conference papers
# have to be treated differently. To some degree we are doing
# BibTeX's job here, but by doing it "manually", we can actually
# insert css-tags so that we can change the looks later on.
if ($entry->type =~ /thesis/) {
$_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n " if $entry->exists('title');
$_ .= "<br />";
if ($entry->type =~ /mastersthesis/) {
$_ .= "Masters thesis, ";
} elsif ($entry->type =~ /phdthesis/) {
$_ .= "PhD thesis, ";
}
$_ .= "<i>".$entry->get('school')."</i>, " if $entry->exists('school');
$_ .= $entry->get('month').",\n " if $entry->exists('month');
$_ .= $entry->get('year')."\n" if $entry->exists('year');
}
if ($entry->type =~ /report/) {
$_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n " if $entry->exists('title');
$_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
$_ .= "<br />Technical report, ";
$_ .= "<i>".$entry->get('institution')."</i>, " if $entry->exists('institution');
$_ .= $entry->get('month').",\n " if $entry->exists('month');
$_ .= $entry->get('year')."\n" if $entry->exists('year');
}
if ($entry->type eq 'article') {
#$_ .= formatauthorstring($entry->get('author')).": " if ($entry->exists('author')); # this would be an altertnative way to list the authors; be consistent!
$_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n " if $entry->exists('title');
$_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
$_ .= "<br /><i>".$entry->get('journal')."</i>" if $entry->exists('journal');
$_ .= " <b>".$entry->get('volume')."</b>" if $entry->exists('volume');
$_ .= ", " unless $entry->exists('volume');
$_ .= "(".$entry->get('number').")" if $entry->exists('number');
$_ .= ":".$entry->get('pages').",\n " if $entry->exists('pages');
$_ .= $entry->get('year')."\n" if $entry->exists('year');
}
if ($entry->type eq 'incollection') {
$_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n " if $entry->exists('title');
$_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
$_ .= "<br />In: ";
$_ .= "<i>".formateditorstring($entry->get('editor'))." (Eds.)</i>:\n " if $entry->exists('editor');
$_ .= "<i>".$entry->get('booktitle')."</i>,\n " if $entry->exists('booktitle');
$_ .= "pp. ".$entry->get('pages').",\n " if $entry->exists('pages');
$_ .= "".$entry->get('publisher').", " if $entry->exists('publisher');
$_ .= "".$entry->get('address').", " if $entry->exists('address');
$_ .= $entry->get('year')."\n" if $entry->exists('year');
}
if ($entry->type eq 'inproceedings') {
$_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n " if $entry->exists('title');
$_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
$_ .= "<br />In: <i>".$entry->get('booktitle')."</i>,\n " if $entry->exists('booktitle');
$_ .= $entry->get('address').",\n " if $entry->exists('address');
$_ .= "pp. ". $entry->get('pages').",\n " if $entry->exists('pages');
$_ .= $entry->get('month').",\n " if $entry->exists('month');
$_ .= $entry->get('year')."\n" if $entry->exists('year');
}
if ($entry->exists('note')) { # the note field contains "to appear", "submitted June 2009" etc. it might also contain a DOI as text
my $bibnote = $entry->get('note');
$bibnote =~ s/(http:\/\/[-a-zA-Z\/.:0-9]+)/<a href="$1">[external resource]<\/a>/g; # activate hyperlinks
$bibnote =~ s/DOI:([\w\d.\/-]+)/\n<span class=\"pubDOI\">DOI:<a class=\"pubDOI\" href=\"http:\/\/dx.doi.org\/$1\">$1<\/a><\/span>\n/; # activate silent DOIs
$_ .= "<span class=\"pubNOTE\">".$bibnote."</span>\n";
}
my $publicationentry = texcleanedstring($_); # just to backup $_ to somewhere
if ($entry->exists('doi')) { # some publications do have a doi-field, and this is how it gets formatted
my $doi = $entry->get('doi');
$publicationentry .= "\n<span class=\"pubDOI\">DOI:<a class=\"pubDOI\" href=\"http://dx.doi.org/$doi\">$doi</a></span>\n";
}
if ($entry->exists('online-information')) { # need that for Springer/Positivity disclaimer "The original publication is available at www.springerlink.com."
$publicationentry .= "\n<br /><span class=\"pubONLINE-INFORMATION\">".$entry->get('online-information')."</span>\n";
}
if ($entry->exists('year')) { # now we generate hashes and reverse sort them by year (and publication title to make the keys unique)
my $tag = "";
$tag = $entry->get('year');
$tag .= texcleanedstring($entry->get('title'));
# finally decide to which category the publication entry we have
# been working on belongs to and file it
if ($entry->type =~ /article|incollection/) {
$jpapers{$tag} = $publicationentry;
} elsif ($entry->type eq 'inproceedings') {
$cpapers{$tag} = $publicationentry;
} elsif ($entry->type =~ /thesis|report/) {
$miscpapers{$tag} = $publicationentry;
}
} else {
warn "ERROR: Found an entry without a year. You'd want to correct that in the bibtex file! Meanwhile I'll ignore that entry.\nThe entry in question is: $_\n\n";
# this may actually happen despite good intentions, e.g., if
# there is a hard-coded html hyperlink in one of the special
# tags (e.g., the "online-information" key is a candidate for
# that).
}
}
print STDERR "Read $publicationcounter publication entries from BibTeX file.\n"; # report status
# we are not actually generating a complete and valid html file, just
# a snipped to paste into something bigger, but could do something like this:
# print "<html><head><title>My list of publications</title></head><body>\n";
print "<h3>Journal papers and book chapters</h3>\n\n";
my @orderedkeys = reverse sort { $a cmp $b } (keys %jpapers);
foreach my $key (@orderedkeys) {
print "<div class=\"pub\"><div class=\"pubCOUNTER\">[$publicationcounter]</div>\n".$jpapers{$key}."</div>\n";
$publicationcounter--;
}
print "\n\n<h3>Conference articles</h3>\n\n";
@orderedkeys = reverse sort { $a cmp $b } (keys %cpapers);
foreach my $key (@orderedkeys) {
print "<div class=\"pub\"><div class=\"pubCOUNTER\">[$publicationcounter]</div>\n".$cpapers{$key}."</div>\n";
$publicationcounter--;
}
print "\n\n<h3>Theses and reports</h3>\n\n";
@orderedkeys = reverse sort { $a cmp $b } (keys %miscpapers);
foreach my $key (@orderedkeys) {
print "<div class=\"pub\"><div class=\"pubCOUNTER\">[$publicationcounter]</div>\n".$miscpapers{$key}."</div>\n";
$publicationcounter--;
}
# print "</body></html>\n"; # we don't need this for our purposes
$bibdeskfile->close; # not sure that this isn't being taken care of by Text::BibTeX anyways