#!/bin/bash -f #FINDBIB 2.3, Aug, 2007 K. Goldstein # ################ FINDBIB 2.0, F. Nesti, August, 26, 1997 ############### # # This script builds a bibliography database from the latex source file. # # Usage: findbib foo.tex # # Every \cite{...} of a reference in SPIRES-Bibtex standard form # (Author:YEARaa) or in the form of an arXiv reference # (arXive-name/yymmnnn OR yymmnnn OR yymm.nnnn) # is searched in WWW-SPIRES.SLAC.STANFORD.EDU and all the bibtex records # are returned in a .BIB file. # # The value of the variable ARXIV sets the default arxiv to search if the # cite is of the form \cite{yymmxxx} # # To change the default arxiv change the value below: # ARXIV='hep-th/' #ARXIV='hep-ph/' # # The .BIB file will have the same name as the .TEX file # # The script uses awk, sed, lynx, bibtex and somes LaTeX. You will have to install # these to make sure it works. # #################################################################### # # How the script works: # # Steps: # Generate an AUX file with LaTeX if not present, # searches the AUX file for labels, # searches SPIRES for records, # writes them in $1.bib, renaming last as $1.bib.old, # calls BibTeX on the file (!). # # # # ####################################################################### # # FINDBIB 2.1, May, 2007 K. Goldstein # # Changes: # # 1. Script updated to fix some changes on Spires website # # 2. Uses sort to remove dupilcate entries # # 3. Can search for author with double barrel surnames # # 4. every cite of the form \cite{yymmxxx} returns a hep-th reference. # To change the default arxiv the value of ARXIV # # 5. every cite of the form \cite{yymm.xxxx} returns a new arxiv reference # # 6. Gratuitous comments added so that the script is easy to fix # when spires changes their format # ####################################################################### # FINDBIB 2.2 : fixed some bugs ####################################################################### ####################################################################### # FINDBIB 2.3 : added some flags ####################################################################### # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation (version 2) # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ######################################## # The main script starts around line 351 # Useful functions defined: # Print usage infromation usage(){ echo "findbib $VERSION" echo "---" echo "Usage: findbib [-ah] texfile.tex" echo echo "Options:" echo echo " -a : Only add new citations" echo " -h : Print this message" } #a counter cpp(){ c=$(($c+1)) } # extract the labels from the .aux file get_labels() { ################### # $1 - the .bib file # $2 - the .aux file # $3 - flag to see whether we should just get new labels ################### if [[ $3 = 0 ]] then get_all_labels $1 $2 else get_new_labels $1 $2 fi } # function to process the labels parse_aux() { echo "---------- EXTRACTING LABELS FROM $1: -------------" awk -F"}" '/\\citation\{/ {for(i=1;i $1 echo "%% BIBTEX FILE FOR $TEXF.tex GENERATED BY FINDBIB $VERSION ON" \ `date +"%x, AT %X."`>>$1 echo "%%" >> $1 else echo "%%" >> $1 echo "%% REFERENCES FOR $TEXF.tex ADDED BY FINDBIB $VERSION ON" \ `date +"%x, AT %X."`>>$1 echo "%%" >> $1 fi } parse_label() { # convert the label into a search term #$1 - a label extracted from the .aux file ########################################### # is the record of the form Author:yyyyxx ? ########################################### if [[ $1 = *:[0-9][0-9][0-9][0-9][a-z][a-z]* ]] then LABEL=`echo $1 | sed -es=:=/=` AUTHOR=${LABEL%/*} DATE=`echo ${LABEL#*/} | sed -e's=[a-z]*$==' ` KEY=`echo ${LABEL#*/} | sed -e's=^....==' ` # add a space in two word names and remove underscores: AUTHOR_SPACE=`echo $AUTHOR | sed -e 's/\([a-z]\)\([A-Z]\)/\1 \2/g' ` SEARCH=A+$AUTHOR_SPACE+AND+DATE+$DATE echo $AUTHOR_SPACE $DATE $KEY ########################################### # is the record of the form *-*:yymmxxx ? ########################################### elif [[ $1 = *-*/[0-9][0-9][0-9][0-9][0-9][0-9][0-9] ]] then SEARCH="EPRINT $1" echo $1 ########################################### # is the record of the form yymmxxx ? ########################################### elif [[ $1 = [0-9][0-9][0-9][0-9][0-9][0-9][0-9] ]] then SEARCH="EPRINT $ARXIV$1" echo $1 ########################################### # is the record of the new form yymm.xxxx ? ########################################### elif [[ $1 = [0-9][0-9][0-9][0-9]\.[0-9][0-9][0-9][0-9] ]] then SEARCH="EPRINT $1" echo $1 ########################################### ########################################### # is the record some other format ? ########################################### else echo $1: PERSONAL LABEL flag=1 fi } get_url() { ####################### # randomly pick a mirror ###################### MIRRORS=("http://www-library.desy.de" "http://www-spires.fnal.gov" "http://usparc.ihep.su" "http://www-spires.dur.ac.uk" "http://www.yukawa.kyoto-u.ac.jp" "http://www.slac.stanford.edu") element_count=${#MIRRORS[@]} let mirror=$RANDOM%$element_count BASE=${MIRRORS[$mirror]} echo Querying $BASE ##################### eval "$1=$BASE"/spires/find/hep/www?"" } download_search (){ ##################### # Download a webpage with the search term $1 # starting at result $2 ##################### # get a random mirror: get_url URL OPTIONS="rawcmd=$1&skip=$2&FORMAT=wwwbriefbibtex" # download page: lynx -useragent=$USER_AGENT -source "$URL$OPTIONS" | sed -e '/@Article{/ s/ //g' >out # sleep for 0-2 seconds between searches so we don't stress the servers sleep $(($RANDOM%3)) } get_hits (){ #$1 is a string that will be set to the number of hits #echo $SEARCH if [[ `grep Paper out` ]] then export $1=`grep Paper out | sed -e 's/^.*of \([0-9]*\).*/\1/' | sort -u` #echo $1 fi } find_eprint_record(){ if [[ ! `grep "" out` = "" ]] then echo "@Article{$label," >> $BIB awk '/@Article\{/,/^}/' out | awk '!/@Article/' - \ | tee -a $BIB \ | grep title | sed 's/ */ /g' else echo "PAPER NOT FOUND" fi } find_record(){ awk /$label/,'/^}/' out \ | tee -a $BIB \ | grep title | sed 's/ */ /g' } process_filename() { if [ $1 ] then if test -f ${1%.tex}.aux then TEXF=${1%.tex} FILE=${1%.tex}.aux BIB=${1%.tex}.bib elif test -f $1 then TEXF=${1%.tex} latex $1 FILE=${1%.tex}.aux BIB=${1%.tex}.bib else echo "No such file $1." exit fi else usage exit fi } test_dependancy () { test_dep=`whereis $1 | awk -F: '{print $2}'` if [ -z "$test_dep" ] then return 0 else return 1 fi } ######################################################## # test dependencies DEPS=("lynx" "sed" "awk") for item in ${DEPS[@]} do test_dependancy $item if [ $? = "0" ] then echo Findbib needs $item to work. Please install $item exit fi done #Boundary conditions VERSION=2.3 c=0 ADD=0 LYNX_VERSION=`lynx -version | head -1 | awk '{print $3}'` USER_AGENT="Findbib_"$VERSION"_(Lynx/$LYNX_VERSION)" # Process the input parameters # Process flags: while getopts "b:ah" options do case $options in b ) BIBFILE=$OPTARG # Unimplemented cpp;cpp;; a ) ADD=1 cpp;; h ) usage exit 1;; \? ) usage exit 1;; * ) usage exit 1;; esac done shift $c # Process the name of the texfile process_filename $1 # get citations from .aux file get_labels $BIB $FILE $ADD # print a header to the bibtex file print_header $BIB $ADD # start getting the records from spires echo "---------- REQUESTING RECORDS FROM spires.slac.stanford.edu:" while read label do echo "------------------------------------------------------------" flag=0 echo "Searching for label:" i=0; NN=0 # parse the label and set $SEARCH to the SPIRES search term parse_label $label # start a loop to search through the results while test $flag = "0" do download_search "$SEARCH" $i let i=i+25 # find out the number of hits for our search if [[ $NN -eq 0 ]] then get_hits NN echo $NN "record(s) found " fi if [[ $NN -gt 25 ]] then echo "Searching-----> " $i fi # check to see if there were results if [[ $NN -eq 0 ]] then flag=1 echo "PAPER NOT FOUND." elif [[ ! `grep $label out` = "" ]] then flag=1 echo "Found:" if [[ $SEARCH = EPRINT* ]] then find_eprint_record else find_record fi # check to see if there are no results: # check to see if there are papers left: elif [[ $i -gt $NN ]] then flag=1 echo " PAPER NOT FOUND." fi done done < bib.labels # clean up rm -f bib.labels out echo "---------- DONE. RECORDS WRITTEN TO $BIB." echo "---------- NOW RUNNING BIBTEX:" bibtex $TEXF exit