fasta矩阵组合脚本
时间:2014-05-11 02:02:27
收藏:0
阅读:372
又是给女朋友做的……没有需求就没有动力这真的是一个合格的程序员么喂。
因为是在Windows下用,不能用bash也不能写Python,只好写成VBS。
第一次用VBS,这货比起Python真是难用得要命,微软太不争气了。不过也算是学了一门新技能吧。
其实这脚本一点也不健壮,文件数量过多时会堆栈溢出,文件名太长会报错,文件太大倒是没测过,想必也有问题。
她已经用完了,咱也没动力改了。有需求请自取。
‘Author by alex_lei@163.com
‘用于对当前路径下多个.fasta或.fas文件按矩阵进行组合,结果存放在result目录中
‘示例:当前路径下有A.fasta、B.fasta和C.fasta三个文件
‘ 运行脚本后,result目录中将生成A+B.fas、B+C.fas、A+C.fas、A+B+C.fas四个文件
‘ 其中,A+B.fas的每一个矩阵都由A.fasta和B.fasta的对应矩阵拼合而成,以此类推
‘ 结果矩阵的名称将采用第一个源矩阵的名称
‘限制:请保证每个文件中矩阵的顺序相同且格式正确(每个矩阵以">矩阵名"开头)。
‘主函数入口
Dim fso
Set fso = CreateObject("Scripting.FileSystemObject")
fso.CreateFolder(".\result")
ForEachPair GetFileName
‘获取文件扩展名
Private Function GetFileExt(ByVal sFileName)
Dim P
For P = Len(sFileName) To 1 Step -1
If InStr(".", Mid(sFileName, P, 1)) Then
Exit For
End If
Next
GetFileExt = Right(sFileName, Len(sFileName) - P)
End Function
‘获取除了路径和扩展名的主文件名
Private Function GetFileMain(ByVal sFilename)
Dim P, Q
For P = Len(sFileName) To 1 Step -1
If InStr(".", Mid(sFileName, P, 1)) Then
Exit For
End If
Next
For Q = P To 1 Step -1
If InStr("\", Mid(sFileName, Q, 1)) Then
Exit For
End If
Next
GetFileMain = Mid(sFileName, Q+1, P-Q-1)
End Function
‘获取所有.fasta文件的完整文件名
Private Function GetFileName
Dim fso, path, filelist, fastafiles()
Dim i, f, arrsize
Set fso = CreateObject("Scripting.FileSystemObject")
Set path = fso.GetFolder(".")
Set filelist = path.Files
i = 0
arrsize = 1
Redim fastafiles(arrsize)
For each f in filelist
If GetFileExt(f) = "fasta" Or GetFileExt(f) = "fas" Then
If i = arrsize Then
arrsize = arrsize + 1
Redim Preserve fastafiles(arrsize - 1)
End If
fastafiles(i) = f
i = i + 1
End If
Next
If i = 0 Then
MsgBox("没有找到.fasta文件,请检查本文件所在路径")
WScript.Quit
End If
GetFileName = fastafiles
End Function
‘将一个文件拆分成若干个sample
Private Function SplitSample(ByVal filename)
Dim fso, f1, content, array, newarray, i
Set fso = CreateObject("Scripting.FileSystemObject")
Set f1 = fso.OpenTextFile(filename, 1)
content = f1.ReadAll
f1.Close
array = Split(content, ">", -1, vbTextCompare)
If array(0) = "" Then
Redim newarray(UBound(array) - 1)
For i = 1 To UBound(array)
newarray(i-1) = array(i)
Next
Else
newarray = array
End If
SplitSample = newarray
End Function
‘将两个sample合并为一个,只保留第一个sample的标题
Private Function JoinSample(ByVal sample1, ByVal sample2)
Dim lnary1, lnary2, tarary, i, j
lnary1 = Split(sample1, vbNewLine, -1, vbTextCompare)
lnary2 = Split(sample2, vbNewLine, -1, vbTextCompare)
Redim tarary(UBound(lnary1) + UBound(lnary2) + 1)
For i = 0 To UBound(lnary1)
tarary(i) = lnary1(i)
Next
For j = i + 1 To i + UBound(lnary2)
‘不合并第二个sample的第一行,因为也是标题
tarary(j) = lnary2(j - i)
Next
tarary(0) = ">" & tarary(0) & vbNewLine
JoinSample = Join(tarary, "")
End Function
‘将两个文件合并到一个文件中
Private Function JoinFile(ByVal file1, ByVal file2)
Dim tarary, ary1, ary2, tarfile, fso, fp, i, maxelem, sample
‘MsgBox file1 & "+" & file2
tarfile = ".\result\" & GetFileMain(file1) & "+" & GetFileMain(file2) & ".fas"
Set fso = CreateObject("Scripting.FileSystemObject")
If fso.FileExists(tarfile) Then
JoinFile = tarfile
Exit Function
End If
ary1 = SplitSample(file1)
ary2 = SplitSample(file2)
If UBound(ary1) <> UBound(ary2) Then
MsgBox(file1 & "的样本数" & UBound(ary1) & "与" & file2 & "的样本数" & UBound(ary2) & "不相等")
WScript.Quit
End If
maxelem = UBound(ary1)
Redim tarary(maxelem)
For i = 0 To maxelem
tarary(i) = JoinSample(ary1(i), ary2(i))
Next
‘写入文件
Set fp = fso.CreateTextFile(tarfile, True)
For each sample in tarary
‘因为sample末尾没有换行,此处输出换行
fp.WriteLine(sample)
Next
fp.Close
JoinFile = tarfile
End Function
‘对文件进行排列组合
Private Function ForEachPair(ByVal list)
Dim i, j, k, newlist, combfile
If UBound(list) = 1 Then
‘只有两个元素则结束递归
ForEachPair = JoinFile(list(0), list(1))
Else
Redim newlist(UBound(list) - 1)
For i = 0 To UBound(list)
k = 0
For j = 0 To UBound(list)
If i <> j Then
newlist(k) = list(j)
‘MsgBox("list(" & j & ") " & list(j) & "newlist(" & k & ") " & newlist(k))
k = k + 1
End If
Next
‘MsgBox(Join(newlist, " | "))
If i = 0 Then
combfile = ForEachPair(newlist)
Else
ForEachPair newlist
End If
Next
‘因为不考虑排列,只需用第一个和后面的组合在一起就可以
ForEachPair = JoinFile(list(0), combfile)
End If
End Function
评论(0)