001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.nbio.genome.util;
023
024import org.biojava.nbio.core.sequence.DNASequence;
025import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
026import org.biojava.nbio.core.sequence.io.FastaWriterHelper;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import java.io.File;
031import java.util.ArrayList;
032import java.util.LinkedHashMap;
033import java.util.Map;
034
035
036/**
037 * Utility to write each Fasta entry to a unique file
038 * @author Scooter Willis 
039 */
040public class SplitFasta {
041
042        private static final Logger logger = LoggerFactory.getLogger(SplitFasta.class);
043
044        public void processNucleotides(File fastaFileName,String uniqueid, File outputDirectory ) throws Exception{
045                if(!outputDirectory.exists())
046                        outputDirectory.mkdirs();
047
048                Map<String,DNASequence> dnaSequenceHashMap = FastaReaderHelper.readFastaDNASequence(fastaFileName);
049                for(DNASequence dnaSequence : dnaSequenceHashMap.values()){
050                        String fileName = outputDirectory.getAbsolutePath() + File.separatorChar;
051                        if(uniqueid.length() > 0){
052                                fileName = fileName + dnaSequence.getAccession().getID() + ".fna";
053                        }else{
054                                fileName = fileName + uniqueid + dnaSequence.getAccession().getID() + ".fna";
055                        }
056                        ArrayList<DNASequence> dnaList = new ArrayList<>();
057                        dnaList.add(dnaSequence);
058                        FastaWriterHelper.writeNucleotideSequence(new File(fileName), dnaList);
059                }
060
061        }
062
063                public static void main( String[] args ){
064                try{
065                        SplitFasta splitFasta = new SplitFasta();
066                        splitFasta.processNucleotides(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"), "", new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/individual"));
067                }catch(Exception e){
068                        logger.error("Exception: ", e);
069                }
070        }
071
072}