SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
seqan3::dna4 Class Reference

The four letter DNA alphabet of A,C,G,T. More...

#include <seqan3/alphabet/nucleotide/dna4.hpp>

+ Inheritance diagram for seqan3::dna4:

Public Member Functions

Constructors, destructor and assignment
constexpr dna4 () noexcept=default
 Defaulted.
 
constexpr dna4 (dna4 const &) noexcept=default
 Defaulted.
 
constexpr dna4 (dna4 &&) noexcept=default
 Defaulted.
 
constexpr dna4operator= (dna4 const &) noexcept=default
 Defaulted.
 
constexpr dna4operator= (dna4 &&) noexcept=default
 Defaulted.
 
 ~dna4 () noexcept=default
 Defaulted.
 
template<std::same_as< rna4 > t>
constexpr dna4 (t const &r) noexcept
 Allow implicit construction from dna/rna of the same size.
 
- Public Member Functions inherited from seqan3::nucleotide_base< dna4, 4 >
constexpr dna4 complement () const noexcept
 Return the complement of the letter. More...
 
constexpr nucleotide_base (other_nucl_type const &other) noexcept
 Allow explicit construction from any other nucleotide type and convert via the character representation. More...
 
- Public Member Functions inherited from seqan3::alphabet_base< derived_type, size, char_t >
constexpr alphabet_base () noexcept=default
 Defaulted.
 
constexpr alphabet_base (alphabet_base const &) noexcept=default
 Defaulted.
 
constexpr alphabet_base (alphabet_base &&) noexcept=default
 Defaulted.
 
constexpr alphabet_baseoperator= (alphabet_base const &) noexcept=default
 Defaulted.
 
constexpr alphabet_baseoperator= (alphabet_base &&) noexcept=default
 Defaulted.
 
 ~alphabet_base () noexcept=default
 Defaulted.
 
constexpr char_type to_char () const noexcept
 Return the letter as a character of char_type. More...
 
constexpr rank_type to_rank () const noexcept
 Return the letter's numeric value (rank in the alphabet). More...
 
constexpr derived_type & assign_char (char_type const chr) noexcept
 Assign from a character, implicitly converts invalid characters. More...
 
constexpr derived_type & assign_rank (rank_type const c) noexcept
 Assign from a numeric value. More...
 

Private Types

using base_t = nucleotide_base< dna4, 4 >
 The base class.
 

Static Private Member Functions

static constexpr rank_type char_to_rank (char_type const chr)
 Returns the rank representation of character. More...
 
static constexpr char_type rank_to_char (rank_type const rank)
 Returns the character representation of rank. More...
 

Private Attributes

friend base_t
 Befriend seqan3::nucleotide_base.
 
friend rna4
 Befriend seqan3::rna4 so it can copy char_to_rank.
 

Static Private Attributes

static constexpr std::array< rank_type, 256 > char_to_rank_table
 The lookup table used in char_to_rank. More...
 
static const std::array< dna4, alphabet_sizecomplement_table
 The complement table. More...
 
static constexpr char_type rank_to_char_table [alphabet_size]
 The lookup table used in rank_to_char. More...
 

Related Functions

(Note that these are not member functions.)

using dna4_vector = std::vector< dna4 >
 Alias for an std::vector of seqan3::dna4. More...
 
Literals
constexpr dna4 operator""_dna4 (char const c) noexcept
 The seqan3::dna4 char literal. More...
 
dna4_vector operator""_dna4 (char const *s, std::size_t n)
 The seqan3::dna4 string literal. More...
 

Additional Inherited Members

- Static Public Member Functions inherited from seqan3::nucleotide_base< dna4, 4 >
static constexpr bool char_is_valid (char_type const c) noexcept
 Validate whether a character value has a one-to-one mapping to an alphabet value. More...
 
- Static Public Attributes inherited from seqan3::alphabet_base< derived_type, size, char_t >
static constexpr detail::min_viable_uint_t< size > alphabet_size = size
 The size of the alphabet, i.e. the number of different values it can take. More...
 
- Protected Types inherited from seqan3::alphabet_base< derived_type, size, char_t >
using char_type = std::conditional_t< std::same_as< char_t, void >, char, char_t >
 The char representation; conditional needed to make semi alphabet definitions legal. More...
 
using rank_type = detail::min_viable_uint_t< size - 1 >
 The type of the alphabet when represented as a number (e.g. via to_rank()). More...
 

Detailed Description

The four letter DNA alphabet of A,C,G,T.

Note that you can assign 'U' as a character to dna4 and it will silently be converted to 'T'.

Like most alphabets, this alphabet cannot be initialised directly from its character representation. Instead initialise/assign from the character literal or use the function seqan3::dna4::assign_char().

int main()
{
using namespace seqan3::literals;
seqan3::dna4 letter{'C'_dna4};
letter.assign_char('F'); // Characters other than IUPAC characters are implicitly converted to A.
seqan3::debug_stream << letter << '\n'; // prints "A"
// IUPAC characters are implicitly converted to their best fitting representative
seqan3::debug_stream << letter.assign_char('R') << '\n'; // prints "A"
seqan3::debug_stream << letter.assign_char('Y') << '\n'; // prints "C"
seqan3::debug_stream << letter.assign_char('S') << '\n'; // prints "C"
seqan3::debug_stream << letter.assign_char('W') << '\n'; // prints "A"
seqan3::debug_stream << letter.assign_char('K') << '\n'; // prints "G"
seqan3::debug_stream << letter.assign_char('M') << '\n'; // prints "A"
seqan3::debug_stream << letter.assign_char('B') << '\n'; // prints "C"
seqan3::debug_stream << letter.assign_char('D') << '\n'; // prints "A"
seqan3::debug_stream << letter.assign_char('H') << '\n'; // prints "A"
seqan3::debug_stream << letter.assign_char('V') << '\n'; // prints "A"
letter.assign_char('a'); // Lower case letters are the same as their upper case equivalent.
seqan3::debug_stream << letter << '\n'; // prints "A"
}
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:211
The four letter DNA alphabet of A,C,G,T.
Definition: dna4.hpp:53
Provides seqan3::debug_stream and related types.
Provides seqan3::dna4, container aliases and string literals.
debug_stream_type debug_stream
A global instance of seqan3::debug_stream_type.
Definition: debug_stream.hpp:42

If the special char conversion of IUPAC characters is not your desired behaviour, refer to our cookbook for an example of A custom dna4 alphabet that converts all unknown characters to A to change the conversion behaviour.

This entity is stable. Since version 3.1.

Member Function Documentation

◆ char_to_rank()

static constexpr rank_type seqan3::dna4::char_to_rank ( char_type const  chr)
inlinestaticconstexprprivate

Returns the rank representation of character.

This function is required by seqan3::alphabet_base.

◆ rank_to_char()

static constexpr char_type seqan3::dna4::rank_to_char ( rank_type const  rank)
inlinestaticconstexprprivate

Returns the character representation of rank.

This function is required by seqan3::alphabet_base.

Friends And Related Function Documentation

◆ dna4_vector

using dna4_vector = std::vector<dna4>
related

Alias for an std::vector of seqan3::dna4.

This entity is stable. Since version 3.1.

◆ operator""_dna4() [1/2]

dna4_vector operator""_dna4 ( char const *  s,
std::size_t  n 
)
related

The seqan3::dna4 string literal.

Returns
seqan3::dna4_vector

You can use this string literal to easily assign to dna4_vector:

int main()
{
using namespace seqan3::literals;
seqan3::dna4_vector sequence1{"ACGTTA"_dna4};
seqan3::dna4_vector sequence2 = "ACGTTA"_dna4;
auto sequence3 = "ACGTTA"_dna4;
}

This entity is stable. Since version 3.1.

◆ operator""_dna4() [2/2]

constexpr dna4 operator""_dna4 ( char const  c)
related

The seqan3::dna4 char literal.

Returns
seqan3::dna4

You can use this char literal to assign a seqan3::dna4 character:

int main()
{
using namespace seqan3::literals;
seqan3::dna4 letter1{'A'_dna4};
auto letter2 = 'A'_dna4;
}

This entity is stable. Since version 3.1.

Member Data Documentation

◆ char_to_rank_table

constexpr std::array<rank_type, 256> seqan3::dna4::char_to_rank_table
staticconstexprprivate
Initial value:
{
[] () constexpr
{
for (size_t rnk = 0u; rnk < alphabet_size; ++rnk)
{
ret[rank_to_char_table[rnk]] = rnk;
ret[to_lower(rank_to_char_table[rnk])] = rnk;
}
ret['U'] = ret['T']; ret['u'] = ret['t'];
ret['R'] = ret['A']; ret['r'] = ret['A'];
ret['Y'] = ret['C']; ret['y'] = ret['C'];
ret['S'] = ret['C']; ret['s'] = ret['C'];
ret['W'] = ret['A']; ret['w'] = ret['A'];
ret['K'] = ret['G']; ret['k'] = ret['G'];
ret['M'] = ret['A']; ret['m'] = ret['A'];
ret['B'] = ret['C']; ret['b'] = ret['C'];
ret['D'] = ret['A']; ret['d'] = ret['A'];
ret['H'] = ret['A']; ret['h'] = ret['A'];
ret['V'] = ret['A']; ret['v'] = ret['A'];
return ret;
}()
}
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:276
static constexpr char_type rank_to_char_table[alphabet_size]
The lookup table used in rank_to_char.
Definition: dna4.hpp:112
constexpr char_type to_lower(char_type const c) noexcept
Converts 'A'-'Z' to 'a'-'z' respectively; other characters are returned as is.
Definition: transform.hpp:81

The lookup table used in char_to_rank.

We would have defined these lookup tables directly within their respective constexpr functions, but at the time of writing this, gcc did not (clang >= 4 did!) auto-generate lookup tables.

static constexpr char_type rank_to_char(rank_type const rank)
{
// not possible because of static not being allowed within a constexpr function
static constexpr lookup_table = ...;
return lookup_table[rank];
}
static constexpr char_type rank_to_char(rank_type const rank)
{
// up-to the compiler to optimise, no guarantee that a lookup table is used.
constexpr lookup_table = ...;
return lookup_table[rank];
}
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:104
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:96
rank_type rank
The value of the alphabet letter is stored as the rank.
Definition: alphabet_base.hpp:338
static constexpr char_type rank_to_char(rank_type const rank)
Returns the character representation of rank.
Definition: dna4.hpp:161
See also
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99320 for the progress on gcc

◆ complement_table

constexpr std::array< dna4, dna4::alphabet_size > seqan3::dna4::complement_table
staticconstexprprivate
Initial value:
{
'T'_dna4,
'G'_dna4,
'C'_dna4,
'A'_dna4
}

The complement table.

◆ rank_to_char_table

constexpr char_type seqan3::dna4::rank_to_char_table[alphabet_size]
staticconstexprprivate
Initial value:
{
'A',
'C',
'G',
'T'
}

The lookup table used in rank_to_char.

We would have defined these lookup tables directly within their respective constexpr functions, but at the time of writing this, gcc did not (clang >= 4 did!) auto-generate lookup tables.

static constexpr char_type rank_to_char(rank_type const rank)
{
// not possible because of static not being allowed within a constexpr function
static constexpr lookup_table = ...;
return lookup_table[rank];
}
static constexpr char_type rank_to_char(rank_type const rank)
{
// up-to the compiler to optimise, no guarantee that a lookup table is used.
constexpr lookup_table = ...;
return lookup_table[rank];
}
See also
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99320 for the progress on gcc

The documentation for this class was generated from the following file: