00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "gn/gnFilter.h"
00014 #include "gn/gnDebug.h"
00015
00016
00017 const gnFilter *gnFilter::alphabetCharacterFilter(){
00018 const static gnFilter* t_filt = new gnFilter(alphabetCharacterFilterType);
00019 return t_filt;
00020 }
00021
00022 const gnFilter *gnFilter::numberCharacterFilter(){
00023 const static gnFilter* t_filt = new gnFilter(numberCharacterFilterType);
00024 return t_filt;
00025 }
00026
00027
00028 const gnFilter *gnFilter::proteinSeqFilter(){
00029 const static gnFilter* t_filt = new gnFilter(proteinSeqFilterType);
00030 return t_filt;
00031 }
00032
00033 const gnFilter *gnFilter::basicDNASeqFilter(){
00034 const static gnFilter* t_filt = new gnFilter(basicDNASeqFilterType);
00035 return t_filt;
00036 }
00037
00038 const gnFilter *gnFilter::fullDNASeqFilter(){
00039 const static gnFilter* t_filt = new gnFilter(fullDNASeqFilterType);
00040 return t_filt;
00041 }
00042
00043 const gnFilter *gnFilter::basicRNASeqFilter(){
00044 const static gnFilter* t_filt = new gnFilter(basicRNASeqFilterType);
00045 return t_filt;
00046 }
00047
00048 const gnFilter *gnFilter::fullRNASeqFilter(){
00049 const static gnFilter* t_filt = new gnFilter(fullRNASeqFilterType);
00050 return t_filt;
00051 }
00052
00053 const gnFilter *gnFilter::DNAtoRNAFilter(){
00054 const static gnFilter* t_filt = new gnFilter(DNAtoRNAFilterType);
00055 return t_filt;
00056 }
00057
00058 const gnFilter *gnFilter::RNAtoDNAFilter(){
00059 const static gnFilter* t_filt = new gnFilter(RNAtoDNAFilterType);
00060 return t_filt;
00061 }
00062
00063 const gnFilter *gnFilter::DNAComplementFilter(){
00064 const static gnFilter* t_filt = new gnFilter(DNAComplementFilterType);
00065 return t_filt;
00066 }
00067
00068 const gnFilter *gnFilter::RNAComplementFilter(){
00069 const static gnFilter* t_filt = new gnFilter(RNAComplementFilterType);
00070 return t_filt;
00071 }
00072
00073
00074
00075 gnFilter::gnFilter()
00076 {
00077 m_defaultChar = 'n';
00078 m_rDefaultChar = 'n';
00079 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00080 m_pairArray[i] = NO_REVCOMP_CHAR;
00081 }
00082 gnFilter::gnFilter( const gnSeqC defaultChar, const gnSeqC rdefaultChar )
00083 {
00084 m_defaultChar = defaultChar;
00085 m_rDefaultChar = rdefaultChar;
00086 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00087 m_pairArray[i] = NO_REVCOMP_CHAR;
00088 }
00089
00090 gnFilter::gnFilter( const gnFilter &sf )
00091 {
00092 m_name = sf.m_name;
00093 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00094 m_pairArray[i] = sf.m_pairArray[i];
00095 m_defaultChar = sf.m_defaultChar;
00096 m_rDefaultChar = sf.m_rDefaultChar;
00097 }
00098
00099 gnFilter::gnFilter( const gnFilterType f_type ){
00100 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00101 m_pairArray[i] = NO_REVCOMP_CHAR;
00102 switch(f_type){
00103 case alphabetCharacterFilterType:
00104 CreateAlphabetCharacterFilter();
00105 break;
00106 case numberCharacterFilterType:
00107 CreateNumberCharacterFilter();
00108 break;
00109 case proteinSeqFilterType:
00110 CreateProteinFilter();
00111 break;
00112 case basicDNASeqFilterType:
00113 CreateBasicDNAFilter();
00114 break;
00115 case fullDNASeqFilterType:
00116 CreateFullDNAFilter();
00117 break;
00118 case basicRNASeqFilterType:
00119 CreateBasicRNAFilter();
00120 break;
00121 case fullRNASeqFilterType:
00122 CreateFullRNAFilter();
00123 break;
00124 case DNAtoRNAFilterType:
00125 CreateDNAtoRNAFilter();
00126 break;
00127 case RNAtoDNAFilterType:
00128 CreateRNAtoDNAFilter();
00129 break;
00130 case DNAComplementFilterType:
00131 CreateDNAComplementFilter();
00132 break;
00133 case RNAComplementFilterType:
00134 CreateRNAComplementFilter();
00135 break;
00136 }
00137 }
00138
00139
00140 gnFilter::~gnFilter()
00141 {
00142 }
00143
00144 inline
00145 void gnFilter::Filter( gnSeqC** seq, uint32& len ) const
00146 {
00147 Array<gnSeqC> array_buf( len );
00148 gnSeqC* tmp = array_buf.data;
00149 gnSeqI c=0;
00150 for(uint32 i=0; i < len; i++)
00151 if(IsValid((*seq)[i]))
00152 tmp[c++] = m_pairArray[(*seq)[i]];
00153 len = c;
00154 memcpy(*seq, tmp, len);
00155 }
00156
00157 void gnFilter::ReverseFilter( gnSeqC** seq, uint32& len ) const
00158 {
00159 gnSeqC tmp, dum;
00160 uint32 halfLen = len/2;
00161 uint32 end = len - 1;
00162 uint32 curB = 0;
00163 uint32 curE = end;
00164 for( uint32 i=0; i < halfLen ; ++i )
00165 {
00166 tmp = m_pairArray[(*seq)[i]];
00167 dum = m_pairArray[(*seq)[ end - i ]];
00168 if(dum != NO_REVCOMP_CHAR)
00169 (*seq)[ curB++ ] = dum;
00170 if(tmp != NO_REVCOMP_CHAR)
00171 (*seq)[ curE-- ] = tmp;
00172 }
00173 if(len&0x1){
00174 tmp = m_pairArray[(*seq)[halfLen]];
00175 if(tmp != NO_REVCOMP_CHAR)
00176 (*seq)[curB++] = tmp;
00177 }
00178
00179 if(curE >= curB){
00180 memmove(*seq+curB, *seq+curE+1, end - curE);
00181 len = end - curE + curB;
00182 }
00183
00184 }
00185
00186 void gnFilter::Filter( string &seq ) const
00187 {
00188 gnSeqI c=0;
00189 for(uint32 i=0; i < seq.length(); i++)
00190 if(IsValid(seq[i]))
00191 seq[c++] = m_pairArray[seq[i]];
00192 }
00193
00194 void gnFilter::ReverseFilter( string &seq ) const
00195 {
00196 gnSeqC tmp, dum;
00197 uint32 halfLen = seq.length()/2;
00198 uint32 end = seq.length() - 1;
00199 uint32 curB = 0;
00200 uint32 curE = end;
00201 for( uint32 i=0; i < halfLen ; ++i )
00202 {
00203 tmp = m_pairArray[seq[i]];
00204 dum = m_pairArray[seq[ end - i ]];
00205 if(dum != NO_REVCOMP_CHAR)
00206 seq[ curB++ ] = dum;
00207 if(tmp != NO_REVCOMP_CHAR)
00208 seq[ curE-- ] = tmp;
00209 }
00210 if(seq.length()&0x1){
00211 tmp = m_pairArray[seq[halfLen]];
00212 if(tmp != NO_REVCOMP_CHAR)
00213 seq[curB++] = tmp;
00214 }
00215
00216 if(curE >= curB){
00217 seq.erase(curB, curE-curB);
00218 }
00219 }
00220
00221
00222 void gnFilter::CreateAlphabetCharacterFilter()
00223 {
00224 SetDefaultChar( 0, 0 );
00225 SetName( "Alphabet Character Filter" );
00226 SetPair( 'A', 'a' );
00227 SetPair( 'B', 'b' );
00228 SetPair( 'C', 'c' );
00229 SetPair( 'D', 'd' );
00230 SetPair( 'E', 'e' );
00231 SetPair( 'F', 'f' );
00232 SetPair( 'G', 'g' );
00233 SetPair( 'H', 'h' );
00234 SetPair( 'I', 'i' );
00235 SetPair( 'J', 'j' );
00236 SetPair( 'K', 'k' );
00237 SetPair( 'L', 'l' );
00238 SetPair( 'M', 'm' );
00239 SetPair( 'N', 'n' );
00240 SetPair( 'O', 'o' );
00241 SetPair( 'P', 'p' );
00242 SetPair( 'Q', 'q' );
00243 SetPair( 'R', 'r' );
00244 SetPair( 'S', 's' );
00245 SetPair( 'T', 't' );
00246 SetPair( 'U', 'u' );
00247 SetPair( 'V', 'v' );
00248 SetPair( 'W', 'w' );
00249 SetPair( 'X', 'x' );
00250 SetPair( 'Y', 'y' );
00251 SetPair( 'Z', 'z' );
00252 }
00253
00254 void gnFilter::CreateNumberCharacterFilter()
00255 {
00256 SetDefaultChar( 0, 0 );
00257 SetName( "Number Character Filter" );
00258 SetSingle( '0' );
00259 SetSingle( '1' );
00260 SetSingle( '2' );
00261 SetSingle( '3' );
00262 SetSingle( '4' );
00263 SetSingle( '5' );
00264 SetSingle( '6' );
00265 SetSingle( '7' );
00266 SetSingle( '8' );
00267 SetSingle( '9' );
00268 }
00269
00270 void gnFilter::CreateProteinFilter()
00271 {
00272 SetDefaultChar( 'u', 'u' );
00273 SetName( "Protein Filter" );
00274 SetSingle( 'A' );
00275 SetSingle( 'R' );
00276 SetSingle( 'N' );
00277 SetSingle( 'D' );
00278 SetSingle( 'C' );
00279 SetSingle( 'Q' );
00280 SetSingle( 'E' );
00281 SetSingle( 'G' );
00282 SetSingle( 'H' );
00283 SetSingle( 'I' );
00284 SetSingle( 'L' );
00285 SetSingle( 'K' );
00286 SetSingle( 'M' );
00287 SetSingle( 'F' );
00288 SetSingle( 'P' );
00289 SetSingle( 'S' );
00290 SetSingle( 'T' );
00291 SetSingle( 'W' );
00292 SetSingle( 'Y' );
00293 SetSingle( 'V' );
00294
00295 SetSingle( 'a' );
00296 SetSingle( 'r' );
00297 SetSingle( 'n' );
00298 SetSingle( 'd' );
00299 SetSingle( 'c' );
00300 SetSingle( 'q' );
00301 SetSingle( 'e' );
00302 SetSingle( 'g' );
00303 SetSingle( 'h' );
00304 SetSingle( 'i' );
00305 SetSingle( 'l' );
00306 SetSingle( 'k' );
00307 SetSingle( 'm' );
00308 SetSingle( 'f' );
00309 SetSingle( 'p' );
00310 SetSingle( 's' );
00311 SetSingle( 't' );
00312 SetSingle( 'w' );
00313 SetSingle( 'y' );
00314 SetSingle( 'v' );
00315 }
00316
00317 void gnFilter::CreateBasicDNAFilter()
00318 {
00319 SetDefaultChar( 'n', 'n' );
00320 SetName( "Basic DNA Filter" );
00321 SetSingle( 'a' );
00322 SetSingle( 'c' );
00323 SetSingle( 'g' );
00324 SetSingle( 't' );
00325 SetSingle( 'A' );
00326 SetSingle( 'C' );
00327 SetSingle( 'G' );
00328 SetSingle( 'T' );
00329 SetSingle( 'n' );
00330 SetSingle( 'N' );
00331 SetSingle( 'x' );
00332 SetSingle( 'X' );
00333 SetSingle( '-' );
00334 }
00335 void gnFilter::CreateFullDNAFilter()
00336 {
00337 SetDefaultChar( 'n', 'n' );
00338 SetName( "Full DNA Filter" );
00339 SetSingle( 'a' );
00340 SetSingle( 'c' );
00341 SetSingle( 'g' );
00342 SetSingle( 't' );
00343 SetSingle( 'A' );
00344 SetSingle( 'C' );
00345 SetSingle( 'G' );
00346 SetSingle( 'T' );
00347 SetSingle( 'r' );
00348 SetSingle( 'y' );
00349 SetSingle( 'k' );
00350 SetSingle( 'm' );
00351 SetSingle( 'b' );
00352 SetSingle( 'v' );
00353 SetSingle( 'd' );
00354 SetSingle( 'h' );
00355 SetSingle( 'R' );
00356 SetSingle( 'Y' );
00357 SetSingle( 'K' );
00358 SetSingle( 'M' );
00359 SetSingle( 'B' );
00360 SetSingle( 'V' );
00361 SetSingle( 'D' );
00362 SetSingle( 'H' );
00363 SetSingle( 's' );
00364 SetSingle( 'S' );
00365 SetSingle( 'w' );
00366 SetSingle( 'W' );
00367 SetSingle( 'n' );
00368 SetSingle( 'N' );
00369 SetSingle( 'x' );
00370 SetSingle( 'X' );
00371 SetSingle( '-' );
00372 }
00373 void gnFilter::CreateBasicRNAFilter()
00374 {
00375 SetDefaultChar( 'n', 'n' );
00376 SetName( "Basic RNA Filter" );
00377 SetSingle( 'a' );
00378 SetSingle( 'c' );
00379 SetSingle( 'g' );
00380 SetSingle( 'u' );
00381 SetSingle( 'A' );
00382 SetSingle( 'C' );
00383 SetSingle( 'G' );
00384 SetSingle( 'U' );
00385 SetSingle( 'n' );
00386 SetSingle( 'N' );
00387 SetSingle( '-' );
00388 }
00389 void gnFilter::CreateFullRNAFilter()
00390 {
00391 SetDefaultChar( 'n', 'n' );
00392 SetName( "Full RNA Filter" );
00393 SetSingle( 'a' );
00394 SetSingle( 'c' );
00395 SetSingle( 'g' );
00396 SetSingle( 'u' );
00397 SetSingle( 'A' );
00398 SetSingle( 'C' );
00399 SetSingle( 'G' );
00400 SetSingle( 'U' );
00401 SetSingle( 'r' );
00402 SetSingle( 'y' );
00403 SetSingle( 'k' );
00404 SetSingle( 'm' );
00405 SetSingle( 'b' );
00406 SetSingle( 'v' );
00407 SetSingle( 'd' );
00408 SetSingle( 'h' );
00409 SetSingle( 'R' );
00410 SetSingle( 'Y' );
00411 SetSingle( 'K' );
00412 SetSingle( 'M' );
00413 SetSingle( 'B' );
00414 SetSingle( 'V' );
00415 SetSingle( 'D' );
00416 SetSingle( 'H' );
00417 SetSingle( 's' );
00418 SetSingle( 'S' );
00419 SetSingle( 'w' );
00420 SetSingle( 'W' );
00421 SetSingle( 'n' );
00422 SetSingle( 'N' );
00423 SetSingle( '-' );
00424 }
00425
00426
00427 void gnFilter::CreateDNAtoRNAFilter(){
00428 SetDefaultChar( 'n', 'n' );
00429 SetName( "Full DNA to RNA Filter" );
00430 SetSingle( 'a' );
00431 SetSingle( 'c' );
00432 SetSingle( 'g' );
00433 SetPair( 't', 'u' );
00434 SetSingle( 'A' );
00435 SetSingle( 'C' );
00436 SetSingle( 'G' );
00437 SetPair( 'T', 'U' );
00438 SetSingle( 'r' );
00439 SetSingle( 'y' );
00440 SetSingle( 'k' );
00441 SetSingle( 'm' );
00442 SetSingle( 'b' );
00443 SetSingle( 'v' );
00444 SetSingle( 'd' );
00445 SetSingle( 'h' );
00446 SetSingle( 'R' );
00447 SetSingle( 'Y' );
00448 SetSingle( 'K' );
00449 SetSingle( 'M' );
00450 SetSingle( 'B' );
00451 SetSingle( 'V' );
00452 SetSingle( 'D' );
00453 SetSingle( 'H' );
00454 SetSingle( 's' );
00455 SetSingle( 'S' );
00456 SetSingle( 'w' );
00457 SetSingle( 'W' );
00458 SetSingle( 'n' );
00459 SetSingle( 'N' );
00460 SetSingle( '-' );
00461 }
00462
00463 void gnFilter::CreateRNAtoDNAFilter(){
00464 SetDefaultChar( 'n', 'n' );
00465 SetName( "Full RNA to DNA Filter" );
00466 SetSingle( 'a' );
00467 SetSingle( 'c' );
00468 SetSingle( 'g' );
00469 SetPair( 'u', 't' );
00470 SetSingle( 'A' );
00471 SetSingle( 'C' );
00472 SetSingle( 'G' );
00473 SetPair( 'U', 'T' );
00474 SetSingle( 'r' );
00475 SetSingle( 'y' );
00476 SetSingle( 'k' );
00477 SetSingle( 'm' );
00478 SetSingle( 'b' );
00479 SetSingle( 'v' );
00480 SetSingle( 'd' );
00481 SetSingle( 'h' );
00482 SetSingle( 'R' );
00483 SetSingle( 'Y' );
00484 SetSingle( 'K' );
00485 SetSingle( 'M' );
00486 SetSingle( 'B' );
00487 SetSingle( 'V' );
00488 SetSingle( 'D' );
00489 SetSingle( 'H' );
00490 SetSingle( 's' );
00491 SetSingle( 'S' );
00492 SetSingle( 'w' );
00493 SetSingle( 'W' );
00494 SetSingle( 'n' );
00495 SetSingle( 'N' );
00496 SetSingle( '-' );
00497 }
00498
00499 void gnFilter::CreateDNAComplementFilter(){
00500 SetDefaultChar( 'n', 'n' );
00501 SetName( "Full DNA Complement Filter" );
00502 SetPair( 'a', 't' );
00503 SetPair( 'A', 'T' );
00504 SetPair( 't', 'a' );
00505 SetPair( 'T', 'A' );
00506 SetPair( 'c', 'g' );
00507 SetPair( 'C', 'G' );
00508 SetPair( 'g', 'c' );
00509 SetPair( 'G', 'C' );
00510 SetPair( 'r', 'y' );
00511 SetPair( 'R', 'Y' );
00512 SetPair( 'y', 'r' );
00513 SetPair( 'Y', 'R' );
00514 SetPair( 'k', 'm' );
00515 SetPair( 'K', 'M' );
00516 SetPair( 'm', 'k' );
00517 SetPair( 'M', 'K' );
00518 SetSingle( 's' );
00519 SetSingle( 'S' );
00520 SetSingle( 'w' );
00521 SetSingle( 'W' );
00522 SetPair( 'b', 'v' );
00523 SetPair( 'B', 'V' );
00524 SetPair( 'v', 'b' );
00525 SetPair( 'V', 'B' );
00526 SetPair( 'd', 'h' );
00527 SetPair( 'D', 'H' );
00528 SetPair( 'h', 'd' );
00529 SetPair( 'H', 'D' );
00530 SetSingle( 'n' );
00531 SetSingle( 'N' );
00532 SetSingle( 'x' );
00533 SetSingle( 'X' );
00534 SetSingle( '-' );
00535 }
00536
00537 void gnFilter::CreateRNAComplementFilter(){
00538 SetDefaultChar( 'n', 'n' );
00539 SetName( "Full RNA Complement Filter" );
00540 SetPair( 'a', 'u' );
00541 SetPair( 'A', 'U' );
00542 SetPair( 'u', 'a' );
00543 SetPair( 'U', 'A' );
00544 SetPair( 'c', 'g' );
00545 SetPair( 'C', 'G' );
00546 SetPair( 'g', 'c' );
00547 SetPair( 'G', 'C' );
00548 SetPair( 'r', 'y' );
00549 SetPair( 'R', 'Y' );
00550 SetPair( 'y', 'r' );
00551 SetPair( 'Y', 'R' );
00552 SetPair( 'k', 'm' );
00553 SetPair( 'K', 'M' );
00554 SetPair( 'm', 'k' );
00555 SetPair( 'M', 'K' );
00556 SetSingle( 's' );
00557 SetSingle( 'S' );
00558 SetSingle( 'w' );
00559 SetSingle( 'W' );
00560 SetPair( 'b', 'v' );
00561 SetPair( 'B', 'V' );
00562 SetPair( 'v', 'b' );
00563 SetPair( 'V', 'B' );
00564 SetPair( 'd', 'h' );
00565 SetPair( 'D', 'H' );
00566 SetPair( 'h', 'd' );
00567 SetPair( 'H', 'D' );
00568 SetSingle( 'n' );
00569 SetSingle( 'N' );
00570 SetSingle( '-' );
00571 }