...
Run Format

Source file test/bench/go1/fasta_test.go

Documentation: test/bench/go1

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package go1
  
  import "runtime"
  
  // Not a benchmark; input for revcomp.
  
  var fastabytes = makefasta()
  
  func makefasta() []byte {
  	var n int = 25e6
  	if runtime.GOARCH == "arm" || runtime.GOARCH == "mips" || runtime.GOARCH == "mips64" {
  		// TODO(dfc) remove this limitation after precise gc.
  		// A value of 25e6 consumes 465mb of heap on 32bit
  		// platforms, which is too much for some systems.
  		// A value of 25e5 produces a memory layout that
  		// confuses the gc on 32bit platforms. So 25e4 it is.
  		n = 25e4
  	}
  	return fasta(n)
  }
  
  func fasta(n int) []byte {
  	out := make(fastaBuffer, 0, 11*n)
  
  	iub := []fastaAcid{
  		{prob: 0.27, sym: 'a'},
  		{prob: 0.12, sym: 'c'},
  		{prob: 0.12, sym: 'g'},
  		{prob: 0.27, sym: 't'},
  		{prob: 0.02, sym: 'B'},
  		{prob: 0.02, sym: 'D'},
  		{prob: 0.02, sym: 'H'},
  		{prob: 0.02, sym: 'K'},
  		{prob: 0.02, sym: 'M'},
  		{prob: 0.02, sym: 'N'},
  		{prob: 0.02, sym: 'R'},
  		{prob: 0.02, sym: 'S'},
  		{prob: 0.02, sym: 'V'},
  		{prob: 0.02, sym: 'W'},
  		{prob: 0.02, sym: 'Y'},
  	}
  
  	homosapiens := []fastaAcid{
  		{prob: 0.3029549426680, sym: 'a'},
  		{prob: 0.1979883004921, sym: 'c'},
  		{prob: 0.1975473066391, sym: 'g'},
  		{prob: 0.3015094502008, sym: 't'},
  	}
  
  	alu := []byte(
  		"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
  			"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
  			"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
  			"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
  			"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
  			"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
  			"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
  
  	out.WriteString(">ONE Homo sapiens alu\n")
  	fastaRepeat(&out, alu, 2*n)
  	out.WriteString(">TWO IUB ambiguity codes\n")
  	fastaRandom(&out, iub, 3*n)
  	out.WriteString(">THREE Homo sapiens frequency\n")
  	fastaRandom(&out, homosapiens, 5*n)
  	return out
  }
  
  type fastaBuffer []byte
  
  func (b *fastaBuffer) Flush() {
  	panic("flush")
  }
  
  func (b *fastaBuffer) WriteString(s string) {
  	p := b.NextWrite(len(s))
  	copy(p, s)
  }
  
  func (b *fastaBuffer) NextWrite(n int) []byte {
  	p := *b
  	if len(p)+n > cap(p) {
  		b.Flush()
  		p = *b
  	}
  	out := p[len(p) : len(p)+n]
  	*b = p[:len(p)+n]
  	return out
  }
  
  const fastaLine = 60
  
  func fastaRepeat(out *fastaBuffer, alu []byte, n int) {
  	buf := append(alu, alu...)
  	off := 0
  	for n > 0 {
  		m := n
  		if m > fastaLine {
  			m = fastaLine
  		}
  		buf1 := out.NextWrite(m + 1)
  		copy(buf1, buf[off:])
  		buf1[m] = '\n'
  		if off += m; off >= len(alu) {
  			off -= len(alu)
  		}
  		n -= m
  	}
  }
  
  const (
  	fastaLookupSize          = 4096
  	fastaLookupScale float64 = fastaLookupSize - 1
  )
  
  var fastaRand uint32 = 42
  
  type fastaAcid struct {
  	sym   byte
  	prob  float64
  	cprob float64
  	next  *fastaAcid
  }
  
  func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid {
  	var lookup [fastaLookupSize]*fastaAcid
  	var p float64
  	for i := range acid {
  		p += acid[i].prob
  		acid[i].cprob = p * fastaLookupScale
  		if i > 0 {
  			acid[i-1].next = &acid[i]
  		}
  	}
  	acid[len(acid)-1].cprob = 1.0 * fastaLookupScale
  
  	j := 0
  	for i := range lookup {
  		for acid[j].cprob < float64(i) {
  			j++
  		}
  		lookup[i] = &acid[j]
  	}
  
  	return &lookup
  }
  
  func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) {
  	const (
  		IM = 139968
  		IA = 3877
  		IC = 29573
  	)
  	lookup := fastaComputeLookup(acid)
  	for n > 0 {
  		m := n
  		if m > fastaLine {
  			m = fastaLine
  		}
  		buf := out.NextWrite(m + 1)
  		f := fastaLookupScale / IM
  		myrand := fastaRand
  		for i := 0; i < m; i++ {
  			myrand = (myrand*IA + IC) % IM
  			r := float64(int(myrand)) * f
  			a := lookup[int(r)]
  			for a.cprob < r {
  				a = a.next
  			}
  			buf[i] = a.sym
  		}
  		fastaRand = myrand
  		buf[m] = '\n'
  		n -= m
  	}
  }
  

View as plain text