# # bioseq.txt - BIOSEQ entries for various databases (Version 1.2) # # This file contains BIOSEQ entries for a number of databases. Currently, # there are entries for GenBank, GenPept (part of GenBank), NRFES, NRL3D, # PIR, PROSITE, REPBASE, SWISS-PROT and UTR (part of NRFES). # # To customize it for your installation, go through the entries and for # each entry: # 1) Change the root directory from "/databases/..." to the # directory where you have installed that database's files. # 2a) If one of the choices listed in the entry matches your database # file organization, uncomment the file/alias lines (if they # have been commented). # 2b) If your installation differed from the choices listed in the # entry, comment out any uncommented lines, add the names of # the database's files, and rewrite the aliases. # 3) Comment out (or change the name of) any index files you don't # want to use. Only include the index files for the identifiers # you want the users to be able to access entries by. # 4) Run idxseq for each of the databases to create the index files. # # In this version, only a few databases are given, but my hope is that # this file could become a canonical list of the databases, their # ftp/WWW locations or ordering information, and their file structure. # So, if you administer, or are familiar with, a database which either # is not included here, is incorrectly given here, or occurs in an ftp # site, CD-ROM or floppy disk release with another file structure, # please send e-mail to knight@cs.ucdavis.edu. # # # Virtual BIOSEQ entries used to create and store index files for # the NID and PID identifiers (to give random access to entries # using the "nid" and "pid" identifiers). # >NID >IdPrefix: nid >Index: /databases/nididx >PID >IdPrefix: pid >Index: /databases/pididx # # The EMBL Nucleotide Sequence Database # >EMBL: /databases/embl >Name: EMBL >IdPrefix: embl >Index: emblindex >Format: emblfast >Alphabet: DNA # # EMBL files as found at ftp site ftp.ebi.ac.uk in # /pub/databases/embl/release. # est?.dat, fun.dat, hum?.dat, inv.dat, mam.dat, org.dat, patent.dat, phg.dat, pln.dat, pro.dat, rod.dat, sts.dat, syn.dat, unc.dat vrl.dat, vrt.dat est:(est?.dat), est1:(est1.dat), est2:(est2.dat), est3:(est3.dat), est4:(est4.dat), est5:(est5.dat), est6:(est6.dat), fun:(fun.dat), hum:(hum?.dat), hum1:(hum1.dat), hum2:(hum2.dat), inv:(inv.dat), mam:(mam.dat), org:(org.dat), pat:(patent.dat), patent:(patent.dat), phg:(phg.dat), pln:(pln.dat), pri:(pri.dat), pro:(pro.dat), rod:(rod.dat), sts:(sts.dat), syn:(syn.dat), unc:(unc.dat), vrl:(vrl.dat), vrt:(vrt.dat) # # The ENZYME Database # >enzyme: /databases/enzyme >Name: ENZYME >IdPrefix: ec >Index: ecindex >Format: EMBL # # Enzyme files as found at ftp site ncbi.nlm.nih.gov in /repository/enzyme. # enzyme.dat # # The GenBank Flat-File Database # >GenBank,gb: /databases/genbank >Name: GenBank >IdPrefix: gb >Index: gbindex >Format: gbfast >Alphabet: DNA # # GenBank files as found at ftp site ncbi.nlm.nih.gov in /genbank. # Uncomment one of the alternatives for the between-release, daily # files (if you have those files). # gbbct.seq, gbest?.seq, gbinv.seq, gbmam.seq, gbpat.seq, gbphg.seq gbpln.seq, gbpri.seq, gbrna.seq, gbrod.seq, gbsts.seq, gbsyn.seq, gbuna.seq, gbvrl.seq, gbvrt.seq bct:(gbbct.seq), est:(gbest?.seq), est1:(gbest1.seq), est2:(gbest2.seq) est3:(gbest3.seq), est4:(gbest4.seq), est5:(gbest5.seq) est6:(gbest6.seq), est7:(gbest7.seq), inv:(gbinv.seq), mam:(gbmam.seq) pat:(gbpat.seq), phg:(gbphg.seq), pln:(gbpln.seq), pri:(gbpri.seq) rna:(gbrna.seq), rod:(gbrod.seq), sts:(gbsts.seq), syn:(gbsyn.seq) una:(gbuna.seq), vrl:(gbvrl.seq), vrt:(gbvrt.seq) ~bct:(gbbct.seq), ~est:(gbest?.seq), ~inv:(gbinv.seq), ~mam:(gbmam.seq) ~pat:(gbpat.seq), ~phg:(gbphg.seq), ~pln:(gbpln.seq), ~pri:(gbpri.seq) ~rna:(gbrna.seq), ~rod:(gbrod.seq), ~sts:(gbsts.seq), ~syn:(gbsyn.seq) ~una:(gbuna.seq), ~vrl:(gbvrl.seq), ~vrt:(gbvrt.seq) # daily/gbcu.flat, daily:(daily/gbcu.flat) # daily-nc/nc????.flat, daily:(daily-nc/nc????.flat) # # The GenPept Protein Translation of GenBank coding sequences. # >GenPept: /databases/genbank >Name: GenPept >IdPrefix: gp >Index: genpeptindex >Format: FASTA >Alphabet: Protein # # GenPept files as found at ftp site ncbi.nlm.nih.gov in /genbank # genpept.fsa, daily/gpcu.fsa # # The Non-Redundant Functionally Equivalent Sequences (NRFES) Database. # >NRFES: /databases/NRFES >Name: NRFES >IdPrefix: gb >Index: nrfesindex >Format: NBRF >Alphabet: DNA # # NRFES files as found at ftp site ncbi.nlm.nih.gov in /repository/NRFES. # all_v05/(bcta, inva, mama, orga, phga, plna, pria, roda, vrla, vrta, yeaa) cds_v05/(bctc, invc, mamc, orgc, phgc, plnc, pric, rodc, vrlc, vrtc, yeac) exo_v05/(inve, mame, orge, plne, prie, rode, vrle, vrte, yeae) ivs_v05/(invi, mami, orgi, plni, prii, rodi, vrli, vrti, yeai) ~:(bcta, inva, mama, orga, phga, plna, pria, roda, vrla, vrta, yeaa) all:(bcta, inva, mama, orga, phga, plna, pria, roda, vrla, vrta, yeaa) cds:(bctc, invc, mamc, orgc, phgc, plnc, pric, rodc, vrlc, vrtc, yeac) exo:(inve, mame, orge, plne, prie, rode, vrle, vrte, yeae) ivs:(invi, mami, orgi, plni, prii, rodi, vrli, vrti, yeai) bct:(bcta), inv:(inva), mam:(mama), org:(orga), phg:(phga), pln:(plna) pri:(pria), rod:(roda), vrl:(vrla), vrt:(vrta), yea:(yeaa) # # The NRL_3D Protein Sequence--Structure Database (a mirror of the PDB # database in PIR/CODATA file format). # >NRL3D: /databases/pir >Name: NRL3D >IdPrefix: pdb >Index: nrl3dindex >Format: pirfast >Alphabet: Protein # # NRL_3D files as included in the PIR release found at ftp site # ncbi.nlm.nih.gov in /repository/PIR. # nrl3d.dat # # The Protein Information Resource (PIR) Database. # >PIR: /databases/pir >Name: PIR >IdPrefix: pir >Index: pirindex >Format: pirfast >Alphabet: Protein # # PIR files as found at ftp site ncbi.nlm.nih.gov in /repository/PIR. # pir1.dat, pir2.dat, pir3.dat pir4.dat ~1:(pir1.dat), ~2:(pir2.dat), ~3:(pir3.dat), ~4:(pir4.dat) ~12:(pir1.dat,pir2.dat), ~13:(pir1.dat,pir3.dat), ~23:(pir2.dat,pir3.dat) ~123:(pir1.dat,pir2.dat,pir3.dat) # # The PROSITE Pattern Database. # >PROSITE: /databases/prosite >Name: PROSITE >IdPrefix: pros >Index: prositeindex >Format: EMBL # # PROSITE files as found at ftp site ncbi.nlm.nih.gov in # /repository/prosite. # prosite.dat # # The REPBASE Repetitive Element Database. # >repbase: /databases/repbase >Name: REPBASE >IdPrefix: rpb >Index: repbaseindex >Format: EMBL # # REPBASE files as found at ftp site ncbi.nlm.nih.gov in # /repository/repbase. # MAIN/(B1.rodent, B2.rodent, L1.primate, MIR.mammal, MIR.primate, MIR.rodent, MIR2.primate, THE.mammal, THE.primate, THE.rodent, THR.human, alu.galago, alu.human, alu.other) MER/MER*.pri REF/(humrep.ref, invrep.ref, mamrep.ref, plnrep.ref, rodrep.ref, simple.ref, vertrep.ref) B1:(B1.rodent), B2:(B2.rodent), L1:(L1.primate) MIR:(MIR.mammal, MIR.primate, MIR.rodent), MIR2:(MIR2.primate) THE:(THE.mammal, THE.primate, THE.rodent), THR:(THR.human) alu:(alu.galago, alu.human, alu.other) rodent:(B1.rodent, B2.rodent, MIR.rodent), rod:(B1.rodent, B2.rodent, MIR.rodent), primate:(L1.primate, MIR.primate, MIR2.primate, THE.primate) pri:(L1.primate, MIR.primate, MIR2.primate, THE.primate) mammal:(MIR.mammal, THE.mammal), mam:(MIR.mammal, THE.mammal) human:(THR.human, alu.human) # # The SWISS-PROT Protein Sequence Data Bank. # >SWISS-PROT,swissprot,sprot: /databases/swiss-prot >Name: SWISS-PROT >IdPrefix: sp >Index: sprotindex >Format: spfast >Alphabet: Protein # # SWISS-PROT files as found at ftp site ncbi.nlm.nih.gov in # /repository/swiss-prot. The ?? match the release number of # the database. # sprot??.dat # updates/new-seq.dat # # The UTR Database (UnTRanslated regions of protein coding genes). # >UTR: /databases/NRFES/utr >Format: FASTA >Index: utrindex >Alphabet: DNA # # UTR files as included in the NRFES release found at ftp site # ncbi.nlm.nih.gov in /repository/NRFES/utr. # amp_3p, bird_3p, dro_3p, fish_3p, inse_3p, mam_3p, mollu_3p mou_3p, nema_3p, prot_3p, rat_3p amp_5p, bird_5p, dro_5p, fish_5p, inse_5p, mam_5p, mollu_5p mou_5p, nema_5p, prot_5p, rat_5p ~_3p:(amp_3p, bird_3p, dro_3p, fish_3p, inse_3p, mam_3p, mollu_3p, mou_3p, nema_3p, prot_3p, rat_3p) ~_5p:(amp_5p, bird_5p, dro_5p, fish_5p, inse_5p, mam_5p, mollu_5p, mou_5p, nema_5p, prot_5p, rat_5p) 3p:(amp_3p, bird_3p, dro_3p, fish_3p, inse_3p, mam_3p, mollu_3p, mou_3p, nema_3p, prot_3p, rat_3p) 5p:(amp_5p, bird_5p, dro_5p, fish_5p, inse_5p, mam_5p, mollu_5p, mou_5p, nema_5p, prot_5p, rat_5p) amp:(amp_3p,amp_5p), bird:(bird_3p,bird_5p), dro:(dro_3p,dro_5p) fish:(fish_3p,fish_5p), inse:(inse_3p,inse_5p), mam:(mam_3p,mam_5p) mollu:(mollu_3p,mollu_5p), mou:(mou_3p,mou_5p), nema:(nema_3p,nema_5p) prot:(prot_3p,prot_5p), rat:(rat_3p,rat_5p)