2. GCTA BESD file#

The BESD file is a binary file format used in GCTA, OSCA, and SMR to store GWAS analysis results. It must be accompanied by the .epi and .esi files when used.

Epi file#

The Epi file is a plaintext file where every line represents an entry of information for a phenotype. Therefore, the number of lines in the .epi file is equal to the number of phenotypes.

FLML description#

def filetype plaintext;
def encode ascii;
export line_num;
export line_order;

[+:line_num]{
    [1]<string>(name="chromosome number")
    [1]<string>(name="variant ID")
    [1]<string>(name="physical position"; datatype=float)
    [1]<string>(name="base position"; datatype=int)
    [1]<string>(name="orientation"; choices={"+", "-"})
}(element_end="\n"; element_sub_sep_file_scope={"\s", "\t"}; order=line_order)

Esi file#

The Esi file is used to record information of variants.

FLML description#

def filetype plaintext;
def encode ascii;
export line_num;
export line_order;
export file_data;

file_data = [];
[+:line_num]{
    [1]<string; :chrom>(name="chromosome")
    [1]<string; :rsid>(name="rsid")
    [1]<string; :pos>(name="physical position"; datatype=float)
    [1]<string; :base_pos>(name="base position"; datatype=uint)
    [1]<string; :a1>(name="reference allel")
    [1]<string; :a2>(name="alternertive allel")
    [1]<string; :freq>(name="minor allel frequency"; datatype=float)
    file_data.append([chrom, rsid, pos, base_pos, a1, a2, freq]);
}(element_end="\n"; element_sub_sep_file_scope=("\s", "\t"); order=line_order)

Besd file#

There kinds of file format of besd file. First is Dense file type, and second is sparse file type.

Macros#

Defined C macros for besd file format.

  • Dense

#define DENSE_FULL 0
#define DENSE_BELT 1
#define OSCA_DENSE_1 4 // 0x00000004: RESERVEDUNITS*ints  + floats  :  <beta, se> for each SNP across all the probes are adjacent.
#define SMR_DENSE_1 0 // 0x00000000 + floats  : beta values (followed by se values) for each probe across all the snps are adjacent.
#define SMR_DENSE_3 5  // RESERVEDUNITS*ints + floats (indicator+samplesize+snpnumber+probenumber+ 12*-9s + values) [SMR default and OSCA default]
  • Sparse

#define SPARSE_FULL 2
#define SPARSE_BELT 3
#define OSCA_SPARSE_1 1 // 0x00000001: RESERVEDUNITS*ints + uint64_t  + uint64_ts + uint32_ts + floats: value number + (half uint64_ts and half uint32_ts of SMR_SPARSE_3) [OSCA default]
#define SMR_SPARSE_3F 0x40400000 // 0x40400000: uint32_t + uint64_t + uint64_ts + uint32_ts + floats
#define SMR_SPARSE_3 3 // RESERVEDUNITS*ints + uint64_t + uint64_ts + uint32_ts + floats (indicator+samplesize+snpnumber+probenumber+ 6*-9s +valnumber+cols+rowids+betases) [SMR default]

Sparse and Dense file formate#

def filetype binary;
def endianness little;
import epi_format;
import esi_format;
reqire epi_file;
reqire esi_file;

[1]<int32; :file_format>(name="format marker"; choices={3, 5})
[1]<uint32>(name="sample number"; NA=-9)
[1]<uint32; :esi_len>(name="number of variants")
assert esi_len == esi_format.line_num;
[1]<uint32; :epi_len>(name="number of probes")
assert epi_len == epi_format.line_num;
[12]<int32; []=-9>(name="coserved bytes")

if (file_format == 5){
    [epi_num] {
        [esi_num]<float>(name="beta values of one probe"; align_with=esi_format.line_order)
        [esi_num]<float>(name="se values of one probe"; align_with=esi_format.line_order)
    }(name="probe data"; align_with=epi_format.line_order)
}

if (file_format == 3){

    [1]<uint64; :value_num>(name="the number of values"; dsp="inlcude beta and se values")
    [1]<uint64; =0>(name="first offset")

    beta_value_s = [];
    se_value_s = [];
    last_offset = 0;
    [epi_len]{
        [1]<uint64; :beta_offset>(name="totall offset of beta data")
        [1]<uint64; :se_offset>(name="totall offset of se data")
        beta_num = beta_offset - last_offset;
        last_offset = beta_offset;
        se_num = se_offset - last_offset;
        last_offset = se_offset;
        assert beta_num == se_num;
        beta_value_s.append(beta_num);
        se_value_s.append(se_num);
    }(name="data offset of probes"; align_with=epi_format.line_order)

    orders = [];
    for (i = 0; i < epi_len; i++) {
        [beta_value_s[i]]<uint32; beta_idx>(name="index for esi of beta"; correspond=esi_format.file_data[beta_idx]; order=beta_order)
        [se_value_s[i]]<uint32; :se_idx>(name="index for esi of se"; align_with=esi_format.file_data[se_idx])
        orders.append(beta_order);
    }

    for (i = 0; i < epi_len; i++) {
        [beta_value_s[i]]<float>(name="beta values"; align_with=orders[i])
        [se_value_s[i]]<float>(name="se values"; align_with=orders[i])
    }
}