/*
Copyright (c) 2015, Sung Hoon Baek (shun.baek@gmail.com)
http://core.jwu.ac.kr/me
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Sung Hoon Baek nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL SUNG HOON BAEK BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* RAID Level 6 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include "lore.h"
#include "sc.h"
#include "mio.h"
#include "mio_page.h"
#include "syndrome.h"
#define STATIC static
#define gen_syndrome r6_gen_syndrome
void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs);
struct mio_r6_priv {
int phys_failed_disk1;
int phys_failed_disk2;
int num_faults;
struct semaphore scrub_mutex;
uint8_t *p_scrub_page_p, *p_scrub_page_q;
};
void r6_read_modify_write(int disks, size_t bytes,
void **old_ptrs, void **new_ptrs);
/* In .bss so it's zeroed */
static char raid6_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
STATIC void mio_r6_cleanup(struct mio *mio)
{
struct mio_r6_priv *priv;
priv = (struct mio_r6_priv *)mio->raid_priv;
if (priv) {
free_page((unsigned long)priv->p_scrub_page_p);
free_page((unsigned long)priv->p_scrub_page_q);
vfree(priv);
}
mio->raid_priv = NULL;
}
STATIC int mio_r6_init(struct mio *mio)
{
int failed_disks[2];
struct mio_r6_priv *priv;
mio->D = mio->N - 2;
mio->num_pages = mio->m * mio->N; /* data pages + parity pages + Q pages */
mio->num_data_pages = mio->m * mio->D;
mio->raid_priv = (struct mio_r6_priv *)vmalloc(sizeof(struct mio_r6_priv));
if (mio->raid_priv == NULL)
return -ENOMEM;
priv = (struct mio_r6_priv *)mio->raid_priv;
priv->num_faults = lore_get_failed_disks(mio->lore, failed_disks, 2);
priv->phys_failed_disk1 = failed_disks[0];
priv->phys_failed_disk2 = failed_disks[1];
sema_init(&priv->scrub_mutex, 1);
priv->p_scrub_page_p = (uint8_t *)__get_free_page(GFP_KERNEL);
priv->p_scrub_page_q = (uint8_t *)__get_free_page(GFP_KERNEL);
BUG_ON(priv->p_scrub_page_p == NULL);
BUG_ON(priv->p_scrub_page_q == NULL);
BUG_ON(PAGE_SIZE != mio->page_size);
return 0;
}
/*
logical data placement: before rotating parity strips
0 1 2 3 4
S1 S2 S3 P Q
S4 S5 S6 P Q
S7 S8 S9 P Q
S10 S11 S12 P Q
S13 S14 S15 P Q
S?: strip
P: parity strip
Q: the second syndrome strip
the strip comprises contiguous blocks.
S1 & S2 & S3 & P & Q consists of a stripe.
the strip is not the stripe.
physical data placement of RAID6: after rotating parity strips
0 1 2 3 4
S1 S2 S3 P Q
S4 S5 P Q S6
S7 P Q S8 S9
P Q S10 S11 S12
Q S13 S14 S15 P
*/
// the physical parity disk for (sc_pos, *)
#define mio_r6_P_disk(mio, sc_pos) ((mio)->N - 1 - ((sc_pos + 1 ) % (mio)->N))
// the physical Q disk for (sc_pos, *)
#define mio_r6_Q_disk(mio, sc_pos) ((mio)->N - 1 - ((sc_pos) % (mio)->N))
// logical disk for (*, b_pos)
#define mio_r6_logical_data_disk(mio, b_pos) ((b_pos) / (mio)->m)
/* the physical data disk for (sc_pos, b_pos) */
STATIC int mio_r6_physical_data_disk(struct mio *mio, stripe_t sc_pos, unsigned b_pos)
{
int P_disk = mio_r6_P_disk(mio, sc_pos);
int l_disk = mio_r6_logical_data_disk(mio, b_pos);
if (P_disk == mio->N-1) return l_disk+1;
if (l_disk >= P_disk) return l_disk+2;
return l_disk;
}
STATIC int mio_r6_logical_data_disk_func(struct mio *mio, unsigned b_pos)
{
return mio_r6_logical_data_disk(mio, b_pos);
}
/* input: logical disk number with the LB position
* output: physical disk number */
STATIC int mio_r6_logi2phys_disk(struct mio *mio, stripe_t sc_pos, int l_disk)
{
int P_disk;
if (l_disk<0) return -1;
if (l_disk == mio->D) // check whether P
return mio_r6_P_disk(mio, sc_pos);
if (l_disk == mio->D+1) //check whether Q
return mio_r6_Q_disk(mio, sc_pos);
// it must be a data disk from here
P_disk = mio_r6_P_disk(mio, sc_pos);
if (P_disk == mio->N-1) return l_disk+1;
if (l_disk >= P_disk) return l_disk+2;
return l_disk;
}
/* input: physical disk number with the LB position
* output: logical disk number */
STATIC int mio_r6_phys2logi_disk(struct mio *mio, stripe_t sc_pos, int phys_disk)
{
int P_disk, Q_disk;
if (phys_disk<0) return -1;
P_disk = mio_r6_P_disk(mio, sc_pos);
if (phys_disk == P_disk) return mio->D;
Q_disk = mio_r6_Q_disk(mio, sc_pos);
if (phys_disk == Q_disk) return mio->D+1;
if (phys_disk > Q_disk) {
if (Q_disk==0) return phys_disk-1;
return phys_disk-2;
}
return phys_disk;
}
/* sector offset of P or Q for (sc_pos, 0) */
STATIC sector_t mio_r6_get_PQ_sector(struct mio *mio, stripe_t sc_pos)
{
sector_t s = sc_pos;
return (s*mio->m)<<mio->page_to_sector_shift;
}
/* return offset from the logical position (sc_pos, b_pos) in the disk */
STATIC sector_t mio_r6_get_data_sector(struct mio *mio, stripe_t sc_pos, unsigned b_pos)
{
sector_t s = sc_pos;
return (s*mio->m + (b_pos%mio->m))<<mio->page_to_sector_shift;
}
STATIC sector_t mio_r6_sector_offset(struct mio *mio, stripe_t sc_pos, unsigned b_pos)
{
if (b_pos >= mio->num_data_pages)
return mio_r6_get_PQ_sector(mio, sc_pos);
else
return mio_r6_get_data_sector(mio, sc_pos, b_pos);
}
/* logical fault disks */
/* if no failed disk, *d1 = *d2 = -1
* if one disk failed, *d1 = the failed disk number, *d2 = -1
* if two disk failed *d1 = a failed disk number, *d2 = the other failed disk number, *d1<*d2*/
STATIC int mio_r6_l_failed_disks(struct mio *mio, stripe_t sc_pos, int *d1, int *d2)
{
int num_faults=0;
register struct mio_r6_priv *priv = (struct mio_r6_priv *)mio->raid_priv;
*d1 = -1;
*d2 = -1;
if (unlikely(priv->phys_failed_disk1>=0)) {
*d1 = mio_r6_phys2logi_disk(mio, sc_pos, priv->phys_failed_disk1);
num_faults++;
}
if (unlikely(priv->phys_failed_disk2>=0)) {
*d2 = mio_r6_phys2logi_disk(mio, sc_pos, priv->phys_failed_disk2);
num_faults++;
}
#ifdef MIO_BAD_BLOCK
if (unlikely(mio->bad_block_count>0)) {
int i;
for (i=0; i<mio->N; i++) {
if (*d1 == i || *d2 == i) continue;
if (mio_find_bad_block(mio, i, sc_pos)) {
if (num_faults==0)
*d1 = i;
else if (num_faults==1)
*d2 = i;
num_faults++;
}
}
}
#endif
if (unlikely(num_faults>=2 && *d1>*d2)) { int t; t=*d1; *d1=*d2; *d2=t; }
if (unlikely(num_faults==2 && *d1==*d2)) { num_faults=1; *d2=-1; }
return num_faults;
}
static int mio_r6_full_stripe_read_condition(struct mio *mio, stripe_t sc_pos)
{
int fd1, fd2;
return mio_r6_l_failed_disks(mio, sc_pos, &fd1, &fd2);
}
/* check whether read-modify-write is more beneficial than reconstruct-write */
// TODO: even though n_d+2 < n_e, the read-modify-write cycle may not be better.
#define SUPPORT_R6_READ_MODIFY_WRITE
#ifdef SUPPORT_R6_READ_MODIFY_WRITE
#define CHOICE(diff) \
{ \
if (n_d+(diff) < n_e) \
goto read_modify_write;\
else \
goto gen_PQ;\
}
#else
#define CHOICE(diff) goto gen_PQ;
#endif
/*<some examples>
* r: MIO_READ
* t: MIO_TREAD
* x: MIO_XOR
* xx: MIO_DOUBLE_XOR
* w: MIO_WRITE
* p: MIO_TEMP_PAGE
* o: MIO_OPERAND
* _: MIO_XOR_DEST
*
*
* recov_op = N/A
* disk: D0 D1 D2 D3 D4 P Q
* cache status: D, E, E, C, D, F, F
* rxw: w, , , , w, ,
*
* disk: D0 D1 D2 D3 D4 P Q
* cache status: D, E, E/F, C, D, , F
* rxw: txxw, , , , txxw, rx_w,
*
* recov_op = MIO_GEN_PQ
* disk: D0 D1 D2 D3 D4 P Q
* cache status: E, E, E, C, D, ,
* rxw: r, r, r, o, w, w, w
*
* recov_op = MIO_RECOV_XOR
* disk: D0 D1 D2 D3 D4 P Q
* cache status: E, E, E, C, D, , F
* rxw: , , , , txxw, rx_w,
*
* recov_op = MIO_RECOV_XOR | MIO_GEN_PQ
* disk: D0 D1 D2 D3 D4 P Q
* cache status: E/F, E, E, C, D, ,
* rxw: _, rx, rx, x, txw, rxw, w
*
* recov_op = MIO_RECOV_DP | MIO_GEN_PQ
* disk: D0 D1 D2 D3 D4 P Q
* cache status: D, E/F, E, C, D, F,
* rxw: tw, p, r, , tw, p, rw
*
* recov_op = MIO_RECOV_DD | MIO_GEN_PQ
* disk: D0 D1 D2 D3 D4 P Q
* cache status: D, E/F, E/F, C, D, ,
* rxw: tw, p, p, , tw, rw, rw
*
* recov_op = MIO_RECOV_DD | MIO_GEN_PQ
* disk: D0 D1 D2 D3 D4 P Q
* cache status: D/F, E/F, E, C, D, ,
* rxw: p, p, , , tw, w, w
*
*
*
*/
STATIC int mio_r6_rxw_matrix_for_write(struct mio_unit *miou)
{
struct sc *sc = miou->sc;
struct mio *mio = miou->mio;
int pbg,disk,pos;
uint8_t st;
uint8_t *data_rxw_matrix = miou->rxw_matrix;
uint8_t *P_rxw_matrix = miou->rxw_matrix + mio->num_data_pages;
uint8_t *Q_rxw_matrix = P_rxw_matrix + mio->m;
int l_P_disk = mio->D;
int l_Q_disk = mio->D+1;
uint8_t l_failed_disks[LORE_MAX_DISKS];
int fd1, fd2; //failed disk
int num_faults;
int n_d, n_e;
memset(&l_failed_disks, 0, sizeof(l_failed_disks));
if ((num_faults=mio_r6_l_failed_disks(mio,sc->sc_pos, &fd1, &fd2))>0) {
l_failed_disks[fd1] = 1;
if (!mio_writable_ldisk(mio, sc, fd1)) l_failed_disks[fd1] |= 0x10;
if (fd2>=0) {
l_failed_disks[fd2] = 1;
if (!mio_writable_ldisk(mio, sc, fd2)) l_failed_disks[fd2] |= 0x10;
}
}
if (unlikely(num_faults >2)) return -1;
memset(miou->rxw_column, 0, mio->N*sizeof(uint8_t));
memset(miou->rxw_matrix, 0, mio->num_pages*sizeof(uint8_t));
memset(miou->recov_op, 0, mio->m*sizeof(uint8_t));
for (pbg=0; pbg<mio->m; pbg++) {
n_d = n_e = 0;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_EMPTY) n_e++;
else if (st==B_DIRTY) n_d++;
}
if (n_d == 0) continue; /* there is no dirty pages */
if (num_faults==0)
CHOICE(2)
else { /* degraded mode */
if (num_faults == 1) {
if (fd1 == l_Q_disk) {
if (l_failed_disks[fd1] & 0x10) goto gen_PQ;
goto choice_without_Q;
}
else if (fd1 == l_P_disk) {
if (l_failed_disks[fd1] & 0x10) goto gen_PQ;
CHOICE(1)
} else { // single data disk failed
st = __sc_get_block_status(sc, fd1*mio->m + pbg);
if (st == B_EMPTY) {
#ifdef SUPPORT_R6_READ_MODIFY_WRITE
if (n_e > 2) // TODO: compare with "if (n_e >= 2 )"
goto read_modify_write;
#endif
goto recover_empty_using_P_and_gen_PQ;
}
goto gen_PQ;
}
}
else if (num_faults == 2) {
if (fd2 == l_Q_disk) {
if (fd1 == l_P_disk) { // PQ failed
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_DIRTY) {
data_rxw_matrix[pos] = MIO_WRITE;
miou->rxw_column[disk] |= MIO_WRITE;
}
}
continue;
} else { // DQ failed
st = __sc_get_block_status(sc, mio->m * fd1 + pbg);
if (st == B_EMPTY) goto read_modify_write_without_Q;
goto reconstruct_write_without_Q;
}
}
else if (fd2 == l_P_disk) { // DP failed
st = __sc_get_block_status(sc, mio->m * fd1 + pbg);
if (st != B_EMPTY) goto gen_PQ;
goto recover_DP;
}
else { // DD failed
int st2 = __sc_get_block_status(sc, mio->m * fd2 + pbg);
st = __sc_get_block_status(sc, mio->m * fd1 + pbg);
if (st != B_EMPTY && st2 != B_EMPTY) goto gen_PQ;
goto recover_DD;
}
}
}
continue;
#ifdef SUPPORT_R6_READ_MODIFY_WRITE
read_modify_write:/*read-modify-write: new parity = old data ^ old parity ^ new data */
// no disk failure
// no gain if N < 7
miou->recov_op[pbg] = MIO_RECOV_RMW_PQ;
if (l_failed_disks[l_P_disk]) P_rxw_matrix[pbg] = MIO_TEMP_PAGE;
else P_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_RXW | MIO_XOR_DEST;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
if (__sc_get_block_status(sc, pos)==B_DIRTY) {
data_rxw_matrix[pos] = MIO_TREAD | MIO_WRITE;
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
#endif
choice_without_Q:
if (n_d+1 < n_e) goto reconstruct_write_without_Q;
else goto read_modify_write_without_Q;
reconstruct_write_without_Q:
// where, the block status of the failed disk is not empty
miou->recov_op[pbg] = MIO_RECOV_XOR;
P_rxw_matrix[pbg] = MIO_XOR_DEST | MIO_WRITE;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
if (l_failed_disks[disk] & 0x10)
data_rxw_matrix[pos] = MIO_XOR;
else {
st = __sc_get_block_status(sc, pos);
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ | MIO_XOR;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_WRITE | MIO_XOR;
else
data_rxw_matrix[pos] = MIO_XOR;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
continue;
read_modify_write_without_Q:
miou->recov_op[pbg] = MIO_RECOV_XOR;
P_rxw_matrix[pbg] = MIO_RXW | MIO_XOR_DEST;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
if (__sc_get_block_status(sc, pos)==B_DIRTY) {
data_rxw_matrix[pos] = MIO_TXXW;
miou->rxw_column[disk] |= MIO_TXXW;
}
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
continue;
recover_empty_using_P_and_gen_PQ:
// single disk failure, the failed disk is empty, recover the failed disk from P and generate P and Q
miou->recov_op[pbg] = MIO_RECOV_XOR | MIO_GEN_PQ;
P_rxw_matrix[pbg] = MIO_READ | MIO_XOR | MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_WRITE; //Q is never failed here.
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
if (l_failed_disks[disk])
data_rxw_matrix[pos] = MIO_XOR_DEST;
else {
st = __sc_get_block_status(sc, pos);
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ | MIO_XOR;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD | MIO_XOR | MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_XOR;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
gen_PQ:
// the failed disk is not empty. generate P and Q, where P may be failed.
miou->recov_op[pbg] = MIO_GEN_PQ;
if (l_failed_disks[l_P_disk] & 0x10) P_rxw_matrix[pbg] = MIO_TEMP_PAGE;
else P_rxw_matrix[pbg] = MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_WRITE; //Q is never failed here.
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
if (l_failed_disks[disk] & 0x10)
data_rxw_matrix[pos] = MIO_OPERAND;
else {
st = __sc_get_block_status(sc, pos);
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_OPERAND;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
recover_DP:
// P and a data disk are failed, the block status of the failed data disk is B_EMPTY.
miou->recov_op[pbg] = MIO_RECOV_DP | MIO_GEN_PQ;
if (l_failed_disks[l_P_disk] & 0x10) P_rxw_matrix[pbg] = MIO_TEMP_PAGE;
else P_rxw_matrix[pbg] = MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
goto DDDP;
recover_DD:
// two data disks are failed.
miou->recov_op[pbg] = MIO_RECOV_DD | MIO_GEN_PQ;
P_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
DDDP:
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (l_failed_disks[disk]) {
if (!(l_failed_disks[disk] & 0x10) && st == B_DIRTY)
data_rxw_matrix[pos] = MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_TEMP_PAGE;
}
else {
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD | MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_OPERAND;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
}
return 0;
}
/* read all empty pages */
/*<some examples>
*one failed data disk
* recov_op = MIO_RECOV_XOR
* disk: D0 D1 D2 D3 D4 P Q
* cache status: E, E/F, E, C, D, ,
* rxw: rx, _, rx, x, tx, rx,
*
* recov_op = MIO_RECOV_DD
* disk: D0 D1 D2 D3 D4 P Q
* cache status: D/F, E/F, E, C, D, ,
* rxw: , , r, , t, r, r
*
* recov_op = MIO_RECOV_DP_NP
* disk: D0 D1 D2 D3 D4 P Q
* cache status: D, E, E/F, C, D, F,
* rxw: t, r, , , t, , r
*
*
* recov_op = MIO_RECOV_XOR
* disk: D0 D1 D2 D3 D4 P Q
* cache status: C, E/F, E, C, D, ,
* rxw: tx, _, rx, x, tx, rx,
*
*two failed data disks
* recov_op = MIO_RECOV_DD
* disk: D0 D1 D2 D3 D4 P Q
* cache status: E/F, E/F, E, C, D, F,
* rxw: , , r, , t, r, r
*
*/
STATIC int mio_r6_rxw_matrix_for_full_read(struct mio_unit *miou)
{
struct sc *sc = miou->sc;
struct mio *mio = miou->mio;
int pbg,disk,pos;
uint8_t *data_rxw_matrix = miou->rxw_matrix;
uint8_t *P_rxw_matrix = miou->rxw_matrix + mio->num_data_pages;
uint8_t *Q_rxw_matrix = P_rxw_matrix + mio->m;
int st;
int l_P_disk = mio->D;
int l_Q_disk = mio->D+1;
int fd1, fd2; //failed disk
int num_faults;
num_faults=mio_r6_l_failed_disks(mio, sc->sc_pos, &fd1, &fd2);
if (unlikely(num_faults >2)) return -1;
memset(miou->rxw_column, 0, mio->N*sizeof(uint8_t));
memset(miou->rxw_matrix, 0, mio->num_pages*sizeof(uint8_t));
memset(miou->recov_op, 0, mio->m*sizeof(uint8_t));
for (pbg=0; pbg<mio->m; pbg++) {
if (num_faults==0)
goto read_pbg;
if (num_faults==1) {
if (fd1>=l_P_disk) {
goto read_pbg;
}
st = __sc_get_block_status(sc, mio->m*fd1+pbg);
if (st != B_EMPTY) {
goto read_pbg;
}
else {
goto read_recov_with_P;
}
}
if (num_faults==2) {
int fd1_pos, fd2_pos;
fd1_pos = mio->m*fd1+pbg;
fd2_pos = mio->m*fd2+pbg;
if (fd1 == l_P_disk && fd2 == l_Q_disk)
goto read_pbg;
if (fd2 == l_Q_disk) // one failed data disk and Q loss
goto read_recov_with_P;
if (fd2 == l_P_disk) { // rebuild one data from Q, (one failed data disk and P loss)
if (__sc_get_block_status(sc, fd1_pos)!=B_EMPTY)
goto read_pbg;
miou->recov_op[pbg] = MIO_RECOV_DP_NP;
P_rxw_matrix[pbg] = MIO_TEMP_PAGE;
Q_rxw_matrix[pbg] = MIO_READ;
}
else { // two data disk failure
int st2;
st = __sc_get_block_status(sc, fd1_pos);
st2 = __sc_get_block_status(sc, fd2_pos);
if (st != B_EMPTY && st2 != B_EMPTY) goto read_pbg;
miou->recov_op[pbg] = MIO_RECOV_DD;
P_rxw_matrix[pbg] = MIO_READ;
Q_rxw_matrix[pbg] = MIO_READ;
}
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (fd1 != disk && fd2 != disk) {
if (st == B_EMPTY)
data_rxw_matrix[pos] = MIO_READ;
else if (st == B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD;
else
data_rxw_matrix[pos] = MIO_OPERAND;
}
else {
if (st == B_DIRTY)
data_rxw_matrix[pos] = MIO_TEMP_PAGE | MIO_XOR_DEST;
else
data_rxw_matrix[pos] = MIO_XOR_DEST;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
}
continue;
read_pbg:
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
if ( __sc_get_block_status(sc, pos) == B_EMPTY ) {
data_rxw_matrix[pos] = MIO_READ;
miou->rxw_column[disk] |= MIO_READ;
}
}
continue;
read_recov_with_P:
// here, the parity is not failed and the fault page is empty
miou->recov_op[pbg] = MIO_RECOV_XOR;
P_rxw_matrix[pbg] = MIO_READ | MIO_XOR;
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
if (fd1!=disk && fd2!=disk) {
st = __sc_get_block_status(sc, pos);
if (st == B_EMPTY)
data_rxw_matrix[pos] = MIO_READ | MIO_XOR;
else if (st == B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD | MIO_XOR;
else
data_rxw_matrix[pos] = MIO_XOR;
}
else
data_rxw_matrix[pos] = MIO_XOR_DEST;
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
continue;
}
return 0;
}
// NOTICE: offsets from start to start+len-1 must belong to a single strip. No error
STATIC int mio_r6_rxw_matrix_for_read(struct mio_unit *miou, unsigned start, unsigned len)
{
int end = start+len;
int b_pos;
struct sc *sc = miou->sc;
struct mio *mio = miou->mio;
uint8_t *data_rxw_matrix = miou->rxw_matrix;
int ldisk;
memset(miou->rxw_column, 0, mio->N*sizeof(uint8_t));
memset(miou->rxw_matrix, 0, mio->num_pages*sizeof(uint8_t));
memset(miou->recov_op, 0, mio->m*sizeof(uint8_t));
ldisk = mio_r6_logical_data_disk(mio, start);
for (b_pos=start; b_pos<end; b_pos++) {
if (__sc_get_block_status(sc, b_pos) == B_EMPTY) {
data_rxw_matrix[b_pos] = MIO_READ;
miou->rxw_column[ldisk] |= MIO_READ;
}
}
return 0;
}
STATIC int mio_r6_rxw_matrix_for_rebuild(struct mio_unit *miou)
{
struct sc *sc = miou->sc;
struct mio *mio = miou->mio;
int pbg,disk,pos;
uint8_t st;
uint8_t *data_rxw_matrix = miou->rxw_matrix;
uint8_t *P_rxw_matrix = miou->rxw_matrix + mio->num_data_pages;
uint8_t *Q_rxw_matrix = P_rxw_matrix + mio->m;
int l_P_disk = mio->D;
int l_Q_disk = mio->D+1;
uint8_t l_failed_disks[LORE_MAX_DISKS];
int fd1, fd2; //faul disk
int num_faults;
int n_d;
int bD;
memset(&l_failed_disks, 0, sizeof(l_failed_disks));
if ((num_faults=mio_r6_l_failed_disks(mio, sc->sc_pos, &fd1, &fd2))>0) {
l_failed_disks[fd1] = 1;
if (fd2>=0)l_failed_disks[fd2] = 1;
}
else return -1;
if (!lore_writable_disk(&sc->lore->disks[fd1])) return -1;
if (fd2>=0 && !lore_writable_disk(&sc->lore->disks[fd2])) return -1;
memset(miou->rxw_column, 0, mio->N*sizeof(uint8_t));
memset(miou->rxw_matrix, 0, mio->num_pages*sizeof(uint8_t));
memset(miou->recov_op, 0, mio->m*sizeof(uint8_t));
// TODO: recover bad block while rebuilding
for (pbg=0; pbg<mio->m; pbg++) {
n_d = 0;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_DIRTY) n_d++;
}
if (num_faults == 1) {
if (fd1 == l_Q_disk)
goto gen_PQ;
else if (fd1 == l_P_disk) {
if (n_d==0) goto gen_P; //generate only Parity
goto gen_PQ;
}
else { // single data disk failed
st = __sc_get_block_status(sc, fd1*mio->m + pbg);
if (st == B_EMPTY) goto recover_empty_using_P_and_gen_PQ;
if (n_d==0) { // there is no dirty and st == B_CLEAN
data_rxw_matrix[fd1*mio->m+pbg] = MIO_WRITE;
miou->rxw_column[fd1] |= data_rxw_matrix[pos];
continue;
} else goto gen_PQ;
}
}
else if (num_faults == 2) {
if (fd2 == l_Q_disk) {
if (fd1 == l_P_disk) // PQ failed
goto gen_PQ;
else { // DQ failed
st = __sc_get_block_status(sc, mio->m * fd1 + pbg);
if (st == B_EMPTY) goto recover_empty_using_P_and_gen_PQ;
else goto gen_PQ;
}
}
else if (fd2 == l_P_disk) { // DP failed
st = __sc_get_block_status(sc, mio->m * fd1 + pbg);
if (st != B_EMPTY) goto gen_PQ; // TODO: for more opimization, if there is no B_DIRTY, generate only parity
goto recover_DP;
}
else { // DD failed
int st2 = __sc_get_block_status(sc, mio->m * fd2 + pbg);
st = __sc_get_block_status(sc, mio->m * fd1 + pbg);
if (st != B_EMPTY && st2 != B_EMPTY) goto gen_PQ;
goto recover_DD;
}
}
continue;
gen_P:
miou->recov_op[pbg] = MIO_RECOV_XOR;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ | MIO_XOR;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_XOR | MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_XOR;
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
P_rxw_matrix[pbg] = MIO_XOR_DEST | MIO_WRITE;
miou->rxw_column[l_P_disk] = P_rxw_matrix[pbg];
continue;
gen_PQ:
// the failed disk is not B_EMPTY. generate P and Q
miou->recov_op[pbg] = MIO_GEN_PQ;
bD=0;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_DIRTY) bD = 1;
if (l_failed_disks[disk]==0) {
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_OPERAND;
} else
data_rxw_matrix[pos] = MIO_WRITE; // B_CLEAN or B_DIRTY
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
if (bD || fd1 == l_P_disk || fd2 == l_P_disk)
P_rxw_matrix[pbg] = MIO_WRITE;
else
P_rxw_matrix[pbg] = MIO_TEMP_PAGE;
Q_rxw_matrix[pbg] = MIO_WRITE;
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
recover_DP:
// P and a data disk are failed, the block status of the failed data disk is B_EMPTY.
bD = 0;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_DIRTY) bD = 1;
if (l_failed_disks[disk])
data_rxw_matrix[pos] = MIO_WRITE;
else {
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD | MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_OPERAND;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
if (bD) {
miou->recov_op[pbg] = MIO_RECOV_DP | MIO_GEN_PQ;
P_rxw_matrix[pbg] = MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
} else {
miou->recov_op[pbg] = MIO_RECOV_DP;
P_rxw_matrix[pbg] = MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_READ;
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
recover_DD:
// two data disks are failed.
bD = 0;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_DIRTY) bD = 1;
if (l_failed_disks[disk])
data_rxw_matrix[pos] = MIO_WRITE;
else {
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD | MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_OPERAND;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
if (bD) {
miou->recov_op[pbg] = MIO_RECOV_DD | MIO_GEN_PQ;
P_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
} else {
miou->recov_op[pbg] = MIO_RECOV_DD;
P_rxw_matrix[pbg] = MIO_READ;
Q_rxw_matrix[pbg] = MIO_READ;
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
recover_empty_using_P_and_gen_PQ:
// single disk failure, the failed disk is empty, recover the failed disk from P and generate P and Q
bD = 0;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st==B_DIRTY) bD = 1;
if (l_failed_disks[disk])
data_rxw_matrix[pos] = MIO_XOR_DEST | MIO_WRITE;
else {
if (st==B_EMPTY)
data_rxw_matrix[pos] = MIO_READ | MIO_XOR;
else if (st==B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD | MIO_XOR | MIO_WRITE;
else
data_rxw_matrix[pos] = MIO_XOR;
}
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
if (bD) {
P_rxw_matrix[pbg] = MIO_READ | MIO_XOR | MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_WRITE;
miou->recov_op[pbg] = MIO_RECOV_XOR | MIO_GEN_PQ;
}
else {
P_rxw_matrix[pbg] = MIO_READ | MIO_XOR;
if (fd1==l_Q_disk || fd2==l_Q_disk) {
Q_rxw_matrix[pbg] = MIO_WRITE;
miou->recov_op[pbg] = MIO_RECOV_XOR | MIO_GEN_PQ;
}
else
miou->recov_op[pbg] = MIO_RECOV_XOR; //if Q is not failed and no dirty blocks, no gen_PQ
}
miou->rxw_column[l_P_disk] |= P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] |= Q_rxw_matrix[pbg];
continue;
}
return 0;
}
STATIC int mio_r6_rxw_matrix_for_initialization(struct mio_unit *miou)
{
struct sc *sc = miou->sc;
struct mio *mio = miou->mio;
uint8_t *rxw_matrix = miou->rxw_matrix;
uint8_t *P_rxw_matrix = miou->rxw_matrix + mio->num_data_pages;
uint8_t *Q_rxw_matrix = P_rxw_matrix + mio->m;
int pbg,j,pos;
int l_P_disk = mio->D;
int l_Q_disk = mio->D+1;
int fd1, fd2;
if (mio_r6_l_failed_disks(mio, sc->sc_pos, &fd1, &fd2)>0) return -1;
memset(rxw_matrix, 0, mio->num_pages*sizeof(uint8_t));
memset(miou->rxw_column, 0, mio->N*sizeof(uint8_t));
memset(miou->recov_op, 0, mio->m*sizeof(uint8_t));
for (pbg=0; pbg<mio->m; pbg++) {
miou->recov_op[pbg] = MIO_RECOV_INIT;
for (j=0, pos=pbg; j<mio->D; j++, pos+=mio->m) {
rxw_matrix[pos] = MIO_READ;
miou->rxw_column[j] |= rxw_matrix[pos];
}
P_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
Q_rxw_matrix[pbg] = MIO_READ | MIO_WRITE;
miou->rxw_column[l_P_disk] = P_rxw_matrix[pbg];
miou->rxw_column[l_Q_disk] = Q_rxw_matrix[pbg];
}
return 0;
}
STATIC int mio_r6_rxw_matrix_for_scrub(struct mio_unit *miou)
{
struct sc *sc = miou->sc;
struct mio *mio = miou->mio;
int pbg,disk,pos;
uint8_t *data_rxw_matrix = miou->rxw_matrix;
uint8_t *P_rxw_matrix = miou->rxw_matrix + mio->num_data_pages;
uint8_t *Q_rxw_matrix = P_rxw_matrix + mio->m;
int l_P_disk = mio->D;
int l_Q_disk = mio->D+1;
int num_faults;
int fd1,fd2;
int st;
num_faults = mio_r6_l_failed_disks(mio, sc->sc_pos, &fd1,&fd2);
if (unlikely(num_faults>0))
return -1;
memset(miou->rxw_column, 0, mio->N*sizeof(uint8_t));
memset(miou->rxw_matrix, 0, mio->num_pages*sizeof(uint8_t));
memset(miou->recov_op, 0, mio->m*sizeof(uint8_t));
for (pbg=0; pbg<mio->m; pbg++) {
P_rxw_matrix[pbg] = MIO_READ;
Q_rxw_matrix[pbg] = MIO_READ;
miou->rxw_column[l_P_disk] = MIO_READ;
miou->rxw_column[l_Q_disk] = MIO_READ;
miou->recov_op[pbg] = MIO_RECOV_SCRUB;
for (disk=0, pos=pbg; disk<mio->D; disk++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st == B_EMPTY)
data_rxw_matrix[pos] = MIO_READ | MIO_XOR;
else if (st == B_DIRTY)
data_rxw_matrix[pos] = MIO_TREAD | MIO_XOR;
else //B_CLEAN
data_rxw_matrix[pos] = MIO_XOR;
miou->rxw_column[disk] |= data_rxw_matrix[pos];
}
}
return 0;
}
STATIC void mio_r6_check_raid_status(struct mio *mio)
{
struct lore *lore;
struct mio_r6_priv *priv;
int failed_disks[2];
if (mio==NULL||mio->lore==NULL) return;
lore = mio->lore;
priv = (struct mio_r6_priv *)mio->raid_priv;
priv->num_faults =
lore_get_failed_disks(mio->lore, failed_disks, 2);
priv->phys_failed_disk1 = failed_disks[0];
priv->phys_failed_disk2 = failed_disks[1];
if (priv->num_faults >= 3)
lore->raid_status = LORE_DOWN;
else if (priv->num_faults >= 1)
lore->raid_status = LORE_DEGRADED;
else if (priv->num_faults == 0)
lore->raid_status = LORE_OK;
}
STATIC void __mio_r6_recov_DP(int N, int bytes, int faila, uint8_t **ptrs, int recover_P)
{
const uint8_t *mul;
uint8_t *d, *p, *q;
d = ptrs[faila];
p = ptrs[N-2];
q = ptrs[N-1];
ptrs[N-1] = d;
ptrs[faila] = raid6_zero_page;
gen_syndrome(N, bytes, (void **)ptrs);
//mul = gfmul[gfexp[255-faila]];
mul = gfmul[gfinv[gfexp[faila]]];
if (recover_P) {
while (bytes--) {
*p++ ^= *d = mul[*d ^ *q];
d++; q++;
}
} else {
while (bytes--) {
*d = mul[*d ^ *q];
d++; q++;
}
}
}
STATIC void __mio_r6_recov_DD(int N, int bytes, int faila, int faisc, uint8_t **ptrs)
{
uint8_t *p, *q, *dp, *dq;
int P_disk = N-2;
int Q_disk = N-1;
p = (uint8_t *)ptrs[P_disk];
q = (uint8_t *)ptrs[Q_disk];
dp = (uint8_t *)ptrs[faila];
dq = (uint8_t *)ptrs[faisc];
ptrs[faila] = (void *)raid6_zero_page;
ptrs[faisc] = (void *)raid6_zero_page;
ptrs[P_disk] = dp;
ptrs[Q_disk] = dq;
gen_syndrome(N, bytes, (void **)ptrs);
r6_DD_engine(p, q, dp, dq, faila, faisc, bytes);
}
#define P_PAGE(mio,miou,pbg) ((miou)->mio_pages + (mio)->D*(mio)->m + (pbg))
#define Q_PAGE(mio,miou,pbg) ((miou)->mio_pages + ((mio)->D+1)*(mio)->m + (pbg))
STATIC void mio_r6_recov_ptrs(struct mio_unit *miou, int pbg, uint8_t **ptrs, int old)
{
struct mio *mio = miou->mio;
struct mio_page *miop;
int disk, b_pos;
int Q_disk, P_disk;
for (disk=0, b_pos=pbg; disk<mio->D; disk++, b_pos+=mio->m) {
miop = miou->mio_pages+b_pos;
if (old && (miou->rxw_matrix[b_pos] & (MIO_TREAD | MIO_TEMP_PAGE)))
ptrs[disk] = miop->temp_page;
else
ptrs[disk] = miop->addr;
}
P_disk = mio->D;
Q_disk = mio->D+1;
ptrs[P_disk] = P_PAGE(mio, miou, pbg)->addr; // P addr
ptrs[Q_disk] = Q_PAGE(mio, miou, pbg)->addr; // Q addr
}
STATIC void mio_r6_recov_DP(struct mio_unit *miou, int pbg, int recov_P)
{
struct mio *mio = miou->mio;
uint8_t **ptrs = (uint8_t **)miou->syndrome_ptrs;
int faila,faisc;
mio_r6_recov_ptrs(miou, pbg, ptrs, 1);
mio_r6_l_failed_disks(mio, miou->sc->sc_pos, &faila, &faisc);
#ifdef LORE_DEBUG
if (faila<0 || faila>=mio->D || faisc!=mio->D) BUG();
#endif
__mio_r6_recov_DP(mio->N, mio->page_size, faila, ptrs, recov_P);
}
STATIC void mio_r6_recov_DD(struct mio_unit *miou, int pbg)
{
struct mio *mio = miou->mio;
uint8_t **ptrs = (uint8_t **)miou->syndrome_ptrs;
int faila,faisc;
mio_r6_recov_ptrs(miou, pbg, ptrs, 1);
mio_r6_l_failed_disks(miou->mio, miou->sc->sc_pos, &faila, &faisc);
#ifdef LORE_DEBUG
if (faila<0 || faisc<0 || faila>=mio->D || faisc>=mio->D) BUG();
#endif
__mio_r6_recov_DD(mio->N, mio->page_size, faila, faisc, ptrs);
}
STATIC void mio_r6_recov_RMW_PQ(struct mio_unit *miou, int pbg)
{
int disk;
unsigned b_pos;
struct mio_page *miop;
struct mio *mio = miou->mio;
uint8_t *nptrs[LORE_MAX_DISKS];
uint8_t **optrs = (uint8_t **)miou->syndrome_ptrs;
for (disk=0, b_pos=pbg; disk<mio->D; disk++, b_pos+=mio->m) {
miop = miou->mio_pages+b_pos;
if (miou->rxw_matrix[b_pos]) {
optrs[disk] = miop->temp_page;
nptrs[disk] = miop->addr;
}
else {
optrs[disk] = NULL;
nptrs[disk] = NULL;
}
}
for ( ; disk<mio->N; disk++, b_pos+=mio->m) {
miop = miou->mio_pages+b_pos;
optrs[disk] = miop->addr;
nptrs[disk] = miop->addr;
}
r6_read_modify_write(mio->N, mio->page_size, (void **)optrs, (void **)nptrs);
}
STATIC void mio_r6_gen_syndrome(struct mio_unit *miou, int pbg)
{
struct mio *mio = miou->mio;
uint8_t **ptrs = (uint8_t **)miou->syndrome_ptrs;
mio_r6_recov_ptrs(miou, pbg, ptrs, 0);
gen_syndrome(mio->N, mio->page_size, (void **)ptrs);
}
STATIC void mio_r6_gen_syndrome_for_scrub(struct mio_unit *miou, int pbg)
{
struct mio *mio = miou->mio;
uint8_t **ptrs = (uint8_t **)miou->syndrome_ptrs;
struct mio_r6_priv *priv = (struct mio_r6_priv *)mio->raid_priv;
mio_r6_recov_ptrs(miou, pbg, ptrs, 0);
ptrs[mio->D] = priv->p_scrub_page_p;
ptrs[mio->D+1] = priv->p_scrub_page_q;
gen_syndrome(mio->N, mio->page_size, (void **)ptrs);
}
static void mio_r6_scrub(struct mio_unit *miou)
{
int pbg;
struct mio *mio = miou->mio;
struct mio_r6_priv *priv;
uint8_t *P_page, *Q_page;
priv = (struct mio_r6_priv *)mio->raid_priv;
down(&priv->scrub_mutex); //mutex for p_scrub_page
for (pbg=0; pbg<mio->m; pbg++) {
mio_r6_gen_syndrome_for_scrub(miou, pbg);
P_page = P_PAGE(mio,miou,pbg)->addr;
Q_page = Q_PAGE(mio,miou,pbg)->addr;
if (memcmp(P_page, priv->p_scrub_page_p, mio->page_size) != 0) {
uint8_t *P_rxw_matrix = miou->rxw_matrix + mio->num_data_pages;
printk(KERN_INFO "lore: found a P-inconsistent stripe %llu.%d\n",
miou->sc->sc_pos, pbg);
memcpy(P_page, priv->p_scrub_page_p, mio->page_size);
miou->rxw_column[mio->D] = MIO_WRITE; //P column
P_rxw_matrix[pbg] = MIO_WRITE;
}
if (memcmp(Q_page, priv->p_scrub_page_q, mio->page_size) != 0) {
uint8_t *Q_rxw_matrix = miou->rxw_matrix + mio->num_data_pages + mio->m;
printk(KERN_INFO "lore: found a Q-inconsistent stripe %llu.%d\n",
miou->sc->sc_pos, pbg);
memcpy(Q_page, priv->p_scrub_page_q, mio->page_size);
miou->rxw_column[mio->D+1] = MIO_WRITE; //Q column
Q_rxw_matrix[pbg] = MIO_WRITE;
}
}
up(&priv->scrub_mutex);
}
STATIC int mio_r6_syndrome(struct mio_unit *miou)
{
struct mio *mio = miou->mio;
int pbg;
uint8_t recov_op = miou->recov_op[0];
if (unlikely(recov_op == MIO_RECOV_INIT)) {
if (mio_is_zeros(miou))
return 1;
else {
for (pbg=0; pbg<mio->m; pbg++)
mio_r6_gen_syndrome(miou,pbg);
return 0;
}
} else if (unlikely(recov_op == MIO_RECOV_SCRUB)) {
mio_r6_scrub(miou);
return 0;
} else {
for (pbg=0; pbg < mio->m; pbg++) {
//source are old or clean data
switch (miou->recov_op[pbg]&(~MIO_GEN_PQ)) {
case MIO_RECOV_XOR:
mio_xor_pbg(miou, pbg);
break;
case MIO_RECOV_DP_NP: // on failed data disk and P loss but do not recover P
mio_r6_recov_DP(miou, pbg, 0);
break;
case MIO_RECOV_DP: // on failed data disk and P loss
mio_r6_recov_DP(miou, pbg, 1);
break;
case MIO_RECOV_DD: // two failed data disks
mio_r6_recov_DD(miou, pbg);
break;
case MIO_RECOV_RMW_PQ:
mio_r6_recov_RMW_PQ(miou, pbg);
break;
default:
break;
}
//sources are the up-to-date data including B_DIRTY
if(miou->recov_op[pbg] & MIO_GEN_PQ)
mio_r6_gen_syndrome(miou, pbg);
}
}
return 0;
}
struct mio_raid mio_raid6 = {
LORE_RAID6,
2,
mio_r6_init,
mio_r6_cleanup,
mio_r6_logi2phys_disk,
mio_r6_physical_data_disk,
mio_r6_logical_data_disk_func,
mio_r6_sector_offset,
mio_r6_rxw_matrix_for_write,
mio_r6_rxw_matrix_for_read,
mio_r6_rxw_matrix_for_full_read,
mio_r6_rxw_matrix_for_rebuild,
mio_r6_rxw_matrix_for_initialization,
mio_r6_rxw_matrix_for_scrub,
mio_r6_check_raid_status,
mio_r6_syndrome,
mio_r6_full_stripe_read_condition
};
#ifdef LORE_DEBUG
static char *szRecov[] = {
"RECOV_NOP", "RECOV_XOR", "RECOV_DP", "RECOV_DP_NP", "RECOV_DD"
};
static void print_rxw(uint8_t rxw)
{
int i, n=0;
if (rxw & MIO_READ)
printk("r");
else n++;
if (rxw & MIO_TREAD)
printk("t");
else n++;
if (rxw & (MIO_XOR))
printk("x");
else n++;
if (rxw & MIO_DOUBLE_XOR)
printk("xx");
else n++;
if (rxw & MIO_XOR_DEST)
printk("_");
else n++;
if (rxw & (MIO_WRITE))
printk("w");
else n++;
if (rxw & (MIO_TEMP_PAGE))
printk("p");
else n++;
for (i=0; i<n; i++) printk(" ");
}
static void disp_sc(struct mio *mio, struct sc *sc, int pbg)
{
int j,pos;
uint8_t st;
int d1, d2;
mio_r6_l_failed_disks(mio, sc->sc_pos, &d1, &d2);
printk(" cache status (stripe=%llu, pbg=%d)------------ \n", sc->sc_pos, pbg);
for (j=0, pos=pbg; j<mio->D; j++, pos+=mio->m) {
st = __sc_get_block_status(sc, pos);
if (st == B_EMPTY)
printk(" E");
if (st == B_CLEAN)
printk(" C");
if (st == B_DIRTY)
printk(" D");
if (d1 == j || d2 == j)
printk("/F");
printk(" ");
}
if (d1 == mio->D || d2 == mio->D)
printk(" P/F");
else printk(" P");
if (d1 == mio->D+1 || d2 == mio->D+1)
printk(" Q/F");
else printk(" Q");
printk("\n\n");
}
static void dis_rxw(struct mio *mio, struct mio_unit *miou, uint8_t *rxw_matrix, int stripe, int pbg)
{
int j,pos;
printk(" rxw-matrix (stripe=%d pbg=%d) ------------ \n", stripe, pbg);
if (miou->recov_op[pbg] & MIO_GEN_PQ)
printk(" -- %s | MIO_GEN_PQ\n", szRecov[miou->recov_op[pbg]&(~MIO_GEN_PQ)]);
else
printk(" -- %s\n", szRecov[miou->recov_op[pbg]]);
for (j=0, pos=pbg; j<mio->N; j++, pos+=mio->m) {
print_rxw(rxw_matrix[pos]);
printk(", ");
}
printk("\ncolumn: ");
for (j=0; j<mio->N; j++) {
print_rxw(miou->rxw_column[j]);
printk(", ");
}
printk("\n");
disp_sc(mio, miou->sc, pbg);
}
#endif