{"id":97,"date":"2017-02-24T00:24:20","date_gmt":"2017-02-24T00:24:20","guid":{"rendered":"http:\/\/uenf.br\/projetos\/biotecveg\/?page_id=97"},"modified":"2017-02-24T15:52:27","modified_gmt":"2017-02-24T15:52:27","slug":"fasta-extractor","status":"publish","type":"page","link":"https:\/\/uenf.br\/projetos\/biotecveg\/fasta-extractor\/","title":{"rendered":"Fasta Extractor"},"content":{"rendered":"<div>&lt;?php<br \/>\n\/\/ FASTA sequence extractor<br \/>\n\/\/ This will take a multi-sequence FASTA file and a list of headers as input<br \/>\n\/\/ and extract the sequences from the file<\/p>\n<p>require(&#8216;.\/tools\/seqtools.php&#8217;);\/\/ include sequence functions, parsing etc.<br \/>\nrequire(&#8216;.\/tools\/formtools.php&#8217;);\/\/ include sequence functions, parsing etc.<\/p>\n<p>\/\/ Form buttons from main form<br \/>\n$submit = $_POST[&#8216;submit&#8217;];<br \/>\n$submit_example = $_POST[&#8216;submit_example&#8217;];<br \/>\n$search=$_POST[&#8216;search&#8217;];<\/p>\n<p>if (get_magic_quotes_gpc()){<br \/>\n$search = stripslashes($search);<br \/>\n}<\/p>\n<p>$search=chop($search);<\/p>\n<p>function parse_sequences ($inputseqs) {<br \/>\n$sequences = parse_fasta($inputseqs, 0, &#8220;-&#8220;); \/\/ returns array of sequences, each consist of two elements: header and sequence, i.e. $var[0][0] = header of seq 1, $var[0][1] = sequence 1 etc&#8230;<br \/>\nif (count($sequences) ==0){<br \/>\n$error = &#8220;Couldn&#8217;t parse FASTA input correctly, please submit fasta formatted sequences&#8221;;<br \/>\nreturn array(1,$error);<br \/>\n}<br \/>\nreturn array(0,$sequences);<br \/>\n}<\/p>\n<p>function extract_fasta($seqs, $s, $case=&#8221;&#8221;) {<br \/>\nif ($case) {$case=&#8221;i&#8221;;}<br \/>\n$s=explode(&#8220;\\n&#8221;, $s);<br \/>\nfor ($i=0; $i&#8217;) === 0) {<br \/>\n$s[$i]=substr($s[$i], 1); \/\/ remove &#8216;&gt;&#8217; in the beginning of search text lines<br \/>\n}<br \/>\n}<br \/>\nif (count($s)==1 and $s[0]==&#8221;&#8221;) {<br \/>\nreturn array(1,&#8221;Nothing to extract&#8221;);<br \/>\n}<br \/>\n$newseqs=array(); \/\/ Init<br \/>\nfor ($i=0; $i&lt;count($seqs); $i++) {\/\/ loop through all sequences<br \/>\n$current_seqheader=$seqs[$i][0];<br \/>\nif ($_POST[&#8216;fuzzy&#8217;]) {<br \/>\n\/\/<br \/>\n\/\/ Do partial match, i.e. match query as only part of header<br \/>\n\/\/<br \/>\nfor ($j=0; $j&#8221;.$current[0].&#8221;\\n&#8221;;<br \/>\n$fullset .= wrap_sequence($current[1], 70, &#8220;\\n&#8221;);<br \/>\n$fullset .= &#8220;\\n&#8221;;<br \/>\n}<\/p>\n<p>$r[&#8216;text&#8217;] = $fullset;<br \/>\n$r[&#8216;html&#8217;] = &#8221;<\/p>\n<pre>\".$fullset.\"<\/pre>\n<p>\\n&#8221;;<\/p>\n<p>$r[&#8216;textlabel&#8217;]=&#8221;Sequences in fasta format&#8221;;<br \/>\n$r[&#8216;htmllabel&#8217;]=&#8221;Sequences in fasta format&#8221;;<\/p>\n<p>$r[&#8216;filename&#8217;]=&#8221;extracted_sequences.fasta&#8221;;<br \/>\n$r[&#8216;filelabel&#8217;]=&#8221;Sequences in fasta format&#8221;;<br \/>\nreturn array(0,$r);<br \/>\n}<\/p>\n<p>if ($submit) {<br \/>\n$r= parse_sequences($inputseqs);<br \/>\nif ($r[0]==1) { $error=$r[1]; }<br \/>\n$sequences=$r[1];<br \/>\n$r= extract_fasta($sequences, $search, $_POST[&#8216;case&#8217;]);<br \/>\nif ($r[0]==1) { $error=$r[1]; }<br \/>\n$sequences_new= $r[1];<br \/>\nunset($r);<br \/>\n}<br \/>\n\/\/<br \/>\n\/\/ BEGIN HTML OUTPUT!<br \/>\n\/\/<br \/>\nshow_page_header(&#8220;FASTA sequence extractor&#8221;, &#8220;<a href=\"#formtools.results\">Analysis finished, results are at bottom (or click here)<\/a>&#8220;, $submit, $error);<br \/>\n?&gt;<\/p>\n<form action=\"#\" enctype=\"multipart\/form-data\" method=\"POST\"><!-- List of sequence headers to get sequences from --><\/p>\n<fieldset>\n<legend>Headers of sequences you want to extract (&#8216;&gt;&#8217; are ignored)<\/legend>\n<p><label>Partial match headers (slower)<\/label>&lt;input type=&#8221;checkbox&#8221; name=&#8221;fuzzy&#8221; &gt;<\/p>\n<p><label>..and case insensitive<\/label>&lt;input type=&#8221;checkbox&#8221; name=&#8221;case&#8221; &gt;<\/p>\n<p><textarea cols=\"40\" name=\"search\" rows=\"10\">&lt;\/p&gt;<br \/>\n&lt;p&gt;<\/textarea><\/p>\n<p>&nbsp;<\/fieldset>\n<p><!-- Settings --><\/p>\n<fieldset>\n<legend>Settings<\/legend>\n<\/fieldset>\n<\/form>\n<p>&lt;?php<br \/>\nif ($error) {<br \/>\nprint &#8221;<\/p>\n<h1><span style=\"color: #aa0011\"><br \/>\n$error<\/span><\/h1>\n<p>\\n&#8221;;<br \/>\n}<br \/>\nif ($submit and !$error) {<br \/>\n$b[] = get_save_button($sequences_new, &#8216;r1&#8217;);<br \/>\nprint get_save_form($b, &#8220;Save new sequences&#8221;);<br \/>\nprint get_results_from_buttons($b);<\/p>\n<p>}<\/p>\n<p>show_page_footer();<br \/>\n?&gt;<\/p><\/div>\n","protected":false},"excerpt":{"rendered":"<p>&lt;?php \/\/ FASTA sequence extractor \/\/ This will take a multi-sequence FASTA file and a list of headers as input&#8230;<\/p>\n","protected":false},"author":9,"featured_media":0,"parent":0,"menu_order":6,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-97","page","type-page","status-publish","hentry"],"_links":{"self":[{"href":"https:\/\/uenf.br\/projetos\/biotecveg\/wp-json\/wp\/v2\/pages\/97","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/uenf.br\/projetos\/biotecveg\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/uenf.br\/projetos\/biotecveg\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/uenf.br\/projetos\/biotecveg\/wp-json\/wp\/v2\/users\/9"}],"replies":[{"embeddable":true,"href":"https:\/\/uenf.br\/projetos\/biotecveg\/wp-json\/wp\/v2\/comments?post=97"}],"version-history":[{"count":0,"href":"https:\/\/uenf.br\/projetos\/biotecveg\/wp-json\/wp\/v2\/pages\/97\/revisions"}],"wp:attachment":[{"href":"https:\/\/uenf.br\/projetos\/biotecveg\/wp-json\/wp\/v2\/media?parent=97"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}