large textfile 27580*1102 cell
Mostrar comentarios más antiguos
fid = fopen('Cancer.txt','r');
data={};
while ~feof(fid)
l=fgetl(fid); %get the lines
if isempty(strfind(l,'NA')), %remove NA rows
else
continue
end
%read next line
idx=regexp(l,'\t','split'); %split the colmuns of this line which don't have NA and look for ';' in every column and split it
[nrow,ncol]=size(idx);
for i=1:ncol
if idx(i)==';' %look for columns which have ';'and split it
split this column into 2 columns and put the second column
into a new row
idx = regexp(idx,';','split')
l=[{l(1:idx-1)}; {[l(1:itab) l(idx+1:end)]}]; %split the line into 2
end
i=i+1;
end
fprintf(fid,l,idx);
end
fid=fclose(fid);
inputs:
Hybridization REF TCGA-A6-2672-11A-01D-1551-05 TCGA-A6-2672-11A-01D-1551-05 TCGA-A6-2672-11A-01D-1551-05
Composite Element REF Beta_value Gene_Symbol Chromosome Genomic_Coordinate Beta_value Gene_Symbol
cg00000292 0.511852232819811 ATP2A1 16 28890100 0.787687855895422 ATP2A1
cg00003994 0.0341977140819682 MEOX2 15725862 0.334815614333325 MEOX2
cg00008493 0.987979722052904 "COX8C;KIAA1409" 14 93813777 0.986128428295584 "COX8C;KIAA1409"
cg00011459 0.922491239231445 "TMEM186;PMM2" 16 8890425 0.961124285303233 "TMEM186;PMM2"
output:
Hybridization REF TCGA-A6-2672-11A-01D-1551-05 TCGA-A6-2672-11A-01D-1551-05 TCGA-A6-2672-11A-01D-1551-05 ……
cg00000292 0.511852232819811 ATP2A1 0.787687855895422
cg00003994 0.0341977140819682 MEOX2 0.334815614333325
cg00008493 0.987979722052904 COX8C 0.986128428295584
cg00008493 0.987979722052904 KIAA1409 0.986128428295584
4 comentarios
chocho
el 21 de Feb. de 2017
chocho
el 21 de Feb. de 2017
Editada: Walter Roberson
el 21 de Feb. de 2017
chocho
el 21 de Feb. de 2017
Respuesta aceptada
Más respuestas (0)
Categorías
Más información sobre Large Files and Big Data en Centro de ayuda y File Exchange.
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!