Fasta Extractor

<?php
// FASTA sequence extractor
// This will take a multi-sequence FASTA file and a list of headers as input
// and extract the sequences from the file

require(‘./tools/seqtools.php’);// include sequence functions, parsing etc.
require(‘./tools/formtools.php’);// include sequence functions, parsing etc.

// Form buttons from main form
$submit = $_POST[‘submit’];
$submit_example = $_POST[‘submit_example’];
$search=$_POST[‘search’];

if (get_magic_quotes_gpc()){
$search = stripslashes($search);
}

$search=chop($search);

function parse_sequences ($inputseqs) {
$sequences = parse_fasta($inputseqs, 0, “-“); // returns array of sequences, each consist of two elements: header and sequence, i.e. $var[0][0] = header of seq 1, $var[0][1] = sequence 1 etc…
if (count($sequences) ==0){
$error = “Couldn’t parse FASTA input correctly, please submit fasta formatted sequences”;
return array(1,$error);
}
return array(0,$sequences);
}

function extract_fasta($seqs, $s, $case=””) {
if ($case) {$case=”i”;}
$s=explode(“\n”, $s);
for ($i=0; $i’) === 0) {
$s[$i]=substr($s[$i], 1); // remove ‘>’ in the beginning of search text lines
}
}
if (count($s)==1 and $s[0]==””) {
return array(1,”Nothing to extract”);
}
$newseqs=array(); // Init
for ($i=0; $i<count($seqs); $i++) {// loop through all sequences
$current_seqheader=$seqs[$i][0];
if ($_POST[‘fuzzy’]) {
//
// Do partial match, i.e. match query as only part of header
//
for ($j=0; $j”.$current[0].”\n”;
$fullset .= wrap_sequence($current[1], 70, “\n”);
$fullset .= “\n”;
}

$r[‘text’] = $fullset;
$r[‘html’] = ”

".$fullset."

\n”;

$r[‘textlabel’]=”Sequences in fasta format”;
$r[‘htmllabel’]=”Sequences in fasta format”;

$r[‘filename’]=”extracted_sequences.fasta”;
$r[‘filelabel’]=”Sequences in fasta format”;
return array(0,$r);
}

if ($submit) {
$r= parse_sequences($inputseqs);
if ($r[0]==1) { $error=$r[1]; }
$sequences=$r[1];
$r= extract_fasta($sequences, $search, $_POST[‘case’]);
if ($r[0]==1) { $error=$r[1]; }
$sequences_new= $r[1];
unset($r);
}
//
// BEGIN HTML OUTPUT!
//
show_page_header(“FASTA sequence extractor”, “Analysis finished, results are at bottom (or click here)“, $submit, $error);
?>

Headers of sequences you want to extract (‘>’ are ignored)

<input type=”checkbox” name=”fuzzy” >

<input type=”checkbox” name=”case” >

 

Settings

<?php
if ($error) {
print ”


$error

\n”;
}
if ($submit and !$error) {
$b[] = get_save_button($sequences_new, ‘r1’);
print get_save_form($b, “Save new sequences”);
print get_results_from_buttons($b);

}

show_page_footer();
?>