Showing posts with label node.js. Show all posts
Showing posts with label node.js. Show all posts

06 September 2011

Parsing a BAM file with javascript, yes we can. (Node.js and V8)

Node.js is an event-driven I/O server-side JavaScript environment based on V8, Google's open source JavaScript engine. In the current post I will describe how I've used Node/V8 to parse a BAM file. Here I've used node v0.5.5 and my code is hosted in a new git repository bionode.

Designing a C++ native extension for Node.js wrapping the bgzf format

BAM files are stored using the bgzf format. We must first create a C++ extension wrapping the methods related to bgzf. This process is nicely described in "Writing Node.js Native Extensions". Here, a BGZF* pointer is wrapped into a class BGZFSupport that extends v8::ObjectWrapp:
class BGZFSupport: public ObjectWrap
 {
 private:
   BGZF* file;
 public:
    (...)
   int close()
    {
    int ret=0;
    if(file!=NULL) ret=::bgzf_close(file);
    file=NULL;
    return ret;
    }
   ~BGZFSupport()
    {
   if(file!=NULL) ::bgzf_close(file);
    }
 (...)
The javascript constructor for BGZFSupport opens the bgzfile and is implemented on the C++ side as:
  static Handle<Value> New(const Arguments& args)
    {
    HandleScope scope;
    if (args.Length() < 2)
      {
      RETURN_THROW("Expected two parameters for bgfz");
      }
    if(!args[0]->IsString())
     {
     RETURN_THROW("1st argument is not a string");
     }
    if(!args[1]->IsString())
     {
     RETURN_THROW("2nd argument is not a string");
     }
    
    v8::String::Utf8Value filename(args[0]);
    v8::String::Utf8Value mode(args[1]);
    BGZF* file= ::bgzf_open(ToCString(filename),ToCString(mode));
    if(file==NULL)
     {
     RETURN_THROW("Cannot open \"" << ToCString(filename) <<  "\"");
     }
    BGZFSupport* instance = new BGZFSupport(file);
    instance->Wrap(args.This());
    return args.This();
    }
... and so on for the other functions...

Implementing the javascript-based BAM-Reader

Next, we can embbed this BGZFSupport in a javascript file that will read a BAM file:
var bgzf=require("bgzf");
and we create a javascript class/function BamReader that will open the file as bgzf and will read the BAM header:
var bgzf=require("bgzf");
var Buffer = require('buffer').Buffer;


function BamReader(path)
 {
 this.fd= new bgzf.bgzf(path,"r");
 var b=new Buffer(4);
 var n = this.fd.read(b,0,4);
 if(n!=4) throw new Error("Cannot read 4 bytes");
 if(b[0]!=66)  throw new Error("Error MAGIC[0]");
 if(b[1]!=65)  throw new Error("Error MAGIC[1] got"+b[1]);
 if(b[2]!="M".charCodeAt(0))  throw new Error("Error MAGIC[2]");
 if(b[3]!="\1".charCodeAt(0))  throw new Error("Error MAGIC[3]");
 
 /* l_text */
 n = this.fd.read(b,0,4);
 if(n!=4) throw new Error("Cannot read 4 bytes");
 var l_text=b.readInt32LE(0);
 b=new Buffer(l_text);
 n = this.fd.read(b,0,l_text);
 if(n!=l_text) throw new Error("Cannot read "+l_text+" bytes (l_text)");
 this.text=b.toString('utf-8', 0, l_text);
 
 /* n_seq */
 b=new Buffer(4);
 n = this.fd.read(b,0,4);
 if(n!=4) throw new Error("Cannot read 4 bytes");
 var n_ref=b.readInt32LE(0);
 this.references=[];
 this.name2seq={};
 for(var i=0;i< n_ref;++i)
  {
  var refseq={};
  /* l_name */
  b=new Buffer(4);
  n = this.fd.read(b,0,4);
  if(n!=4) throw new Error("Cannot read 4 bytes");
  var l_name=b.readInt32LE(0);
  /* name */
  b=new Buffer(l_name);
  n = this.fd.read(b,0,l_name);
  if(n!=l_name) throw new Error("Cannot read "+l_name+" bytes (name)");
  refseq.name=b.toString('utf-8', 0,l_name-1);//\0 terminated
  /* l_ref */
  b=new Buffer(4);
  n = this.fd.read(b,0,4);
  if(n!=4) throw new Error("Cannot read 4 bytes");
  refseq.l_ref=b.readInt32LE(0);
  this.references.push(refseq);
  this.name2seq[refseq.name]=refseq;
  }
 //console.log(this.name2seq);
 }
Another function next() reads the next alignment or returns null ( see the code ).

Testing

$ export NODE_PATH=/path/to/bionode/build

the script reads a simple BAM file and prints the positions of the reads:
(...)

var r= new BamReader("/path/to/samtools-0.1.17/examples/toy.bam");
var align;
while((align=r.next())!=null)
 {
 console.log(
  r.references[align.refID].name+"\t"+
  align.read_name+"\t"+
  align.pos
  );
 }
r.close();

Result

$ node bgzf.js
ref r001 6
ref r002 8
ref r003 8
ref r004 15
ref r003 28
ref r001 36
ref2 x1 0
ref2 x2 1
ref2 x3 5
ref2 x4 9
ref2 x5 11
ref2 x6 13


Remaining questions:

At the moment, I don't know how to correctly package the C++ and javascript files for node.js, how to correctly include the files, how to group the different files under a common 'namespace', etc...

That's It,
Pierre

31 December 2010

Translating a DNA to a Protein using server-side javascript and C: my notebook

In my previous post , I used Node.js to translate a DNA to a protein on the Server-side, using javascript. In the following post, I again will translate a DNAn but this time by calling a specialized C program on the server side.

Source code


The C program

The C program reads a DNA string from stdin a translate it using the standard genetic code:
Compilation:
gcc -o /my/bin/path/translate translate.c

The Node.js script

When the Node.js server receive a DNA parameter, it spawns a new process to the C program and we write the DNA to this process via 'stdin'.
Each time a new 'data' event (containing the protein) is received, it is printed to the http response. At the end of the process, we close the stream by calling 'end()'.

test

> node-v0.2.5/node translate.js
Server running at http://127.0.0.1:8080

> curl -s "http://localhost:8080/?dna=ATGATGATAGATAGATATAGTAGATATGATCGTCAGCCATACG"
MMIDRYSRYDRQPY


That's it,

Pierre

Server-side javascript: translating a DNA with Node.js

(wikipedia) Node.js is an evented I/O framework for the V8 JavaScript engine on Unix-like platforms. It is intended for writing scalable (javascript-based) network programs such as web servers.

In the following post I will create a javascript server translating a DNA to a protein.

Installing Node.js

I've downloaded the sources for Node.js from http://nodejs.org/#download. It compiled (configure+make) and ran without any problem.

The script

The following script contains a class handling a GeneticCode and the server TranslateDna translating the DNA to a protein, it handles both the POST and the GET http methods. It no parameter is found it displays a simple HTML form, else the form data are decoded and the DNA is translated. The protein is returned as a JSON structure.

Running the server

> node-v0.2.5/node translate.js
Server running at http://127.0.0.1:8080

Test


> curl "http://localhost:8080/"
<html><body><form action="/" method="GET"><h1>DNA</h1><textarea name="dna"></textarea><br/><input type="submit" value="Submit"></form></body></html>

> curl "http://localhost:8080/?dna=ATGAACTATCGATGCTACGACTGATCG"
{"protein":"MNYRCYD*S","query":"ATGAACTATCGATGCTACGACTGATCG"}



That's it,

Pierre