1. 建立表結構
SET ANSI_NullS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[web_201007](
[date] [Varchar](255) Null,
[time] [Varchar](255) Null,
[s-ip] [Varchar](255) Null,
[cs-method] [Varchar](255) Null,
[cs-uri-stem] [Nvarchar](255) Null,
[cs-uri-query] [Nvarchar](1000) Null,
[s-port] [Varchar](255) Null,
[cs-username] [Varchar](255) Null,
[c-ip] [Varchar](255) Null,
[cs(User-Agent)] [Nvarchar](1000) Null,
[cs(Referer)] [Nvarchar](1000) Null,
[sc-status] [Varchar](255) Null,
[sc-substatus] [Varchar](255) Null,
[sc-win32-status] [Varchar](255) Null,
[time-taken] [Varchar](255) Null
) ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
注:a. 由於資料量比較大,所以分表保存,上面表名是7月份的。
b. 表的結構要根據你日誌保存的欄位來創建哦。
2. 使用bcp導入資料
bcp [database].dbo.[web_201007] in "g:\iislog\log.txt" -w -t"*" -r"\r" -U"sa" -P"123456" -S"UKEASSERVER\SQLEXPRESS"
-t:列分隔符號
-r:行分隔符號
-w:使用unicode編碼。如果你日誌裡包含有中文的用-w,如果沒有,用-c就好了,-w會使你的資料庫檔案變很大的。
log.txt是日誌檔,如果有中文,要保存為unicode編碼,不要用utf8啦,不然bcp的時候都是亂碼了,就是說bcp不支援utf8的。沒有中文的話,ANSI就ok了。
注:出現亂碼很噁心的,經常說你的表結構有問題啊,什麼欄位不夠長啊,資料字串被截斷了啊。

 

我的IIS日誌檔都是*.log檔,而且每天一個檔,不是txt檔啊,怎麼辦呢?
自己寫代碼啊,把每個月的資料檔案讀出來,去掉沒有用的行,再保存到log.txt檔裡就好了,這個處理過程比bcp時間快多了。

 

IIS日誌轉換
1 using System;
2 using System.Collections.Generic;
3 using System.Text;
4 using System.IO;
5 using System.Text.RegularExpressions;
6
7 namespace Log2Sql
8 {
9 public class FormatLog
10 {
11 private string inputDirectory;
12 private string outputFileName;
13 private string errorFileName;
14 private string fieldSeparator = "*";
15 private string rowSeparator = "\r";
16 private int fieldLength = 15;
17
18 public FormatLog(string inputDirectory, string outputFileName, string errorFileName)
19 {
20 this.inputDirectory = inputDirectory;
21 this.outputFileName = outputFileName;
22 this.errorFileName = errorFileName;
23 }
24
25 public void Execute()
26 {
27 Regex reg = new Regex("u_ex(\\d{6})\\.log");
28 string[] files = Directory.GetFiles(inputDirectory);
29 List<string> temp = new List<string>();
30 foreach(string f in files){
31 Match match = reg.Match(f);
32 string date = match.Groups[1].Value;
33 if(date.StartsWith("1011")){
34 temp.Add(f);
35 }
36 }
37 files = temp.ToArray();
38 using(FileStream fs = new FileStream(outputFileName, FileMode.Create)) {
39 using(StreamWriter sw = new StreamWriter(fs, Encoding.Unicode)) {
40 foreach(string f in files) {
41 Save(GetRecordLineForFile(f), sw);
42 }
43 }
44 }
45 }
46
47 public void WriteInvalidFields()
48 {
49 using(FileStream fs = new FileStream(errorFileName, FileMode.Create)){
50 using(StreamWriter sw = new StreamWriter(fs, Encoding.UTF8)){
51 using(FileStream fs2 = new FileStream(outputFileName, FileMode.Open)) {
52 using(StreamReader sr = new StreamReader(fs2, Encoding.UTF8)) {
53 string lineData = sr.ReadLine();
54 while(!string.IsNullOrEmpty(lineData)) {
55 List<string> fields = GetInvalidFields(lineData);
56 foreach(string f in fields) {
57 sw.WriteLine(string.Format("{0} {1}", f.Length, f));
58 }
59 lineData = sr.ReadLine();
60 }
61 }
62 }
63 }
64 }
65 }
66
67 private void Save(List<string> datas, StreamWriter sw)
68 {
69 foreach(string s in datas) {
70 sw.Write(s);
71 sw.Write(rowSeparator);
72 }
73 }
74
75 private List<string> GetRecordLineForFile(string fileName)
76 {
77 List<string> list = new List<string>();
78 try{
79 using(FileStream fs = new FileStream(fileName, FileMode.Open)){
80 using(StreamReader sr = new StreamReader(fs, Encoding.UTF8)){
81 string lineData = sr.ReadLine();
82 while(!string.IsNullOrEmpty(lineData)) {
83 if(!lineData.StartsWith("#")) {
84 lineData = lineData.Replace(" ", fieldSeparator);
85 if(lineData.Split(fieldSeparator.ToCharArray()).Length == fieldLength){
86 list.Add(lineData);
87 }
 
88 }
89 lineData = sr.ReadLine();
90 }
91 }
92 }
93 }catch(Exception e){
94 throw e;
95 }
96 return list;
97 }
98
99 private List<string> GetInvalidFields(string linedata)
100 {
101 string[] fields = new string[] { "date", "time", "s-ip", "cs-method", "cs-uri-stem", "cs-uri-query", "s-port", "cs-username", "c-ip", "cs(User-Agent)", "cs(Referer)", "sc-status", "sc-substatus", "sc-win32-status", "time-taken" };
102 string[] datas = linedata.Split(fieldSeparator.ToCharArray());
103
104 List<string> temp = new List<string>();
105 for(int i=0; i<datas.Length; i++) {
106 if(datas[i].Length > 255 && i != 10) {
107 temp.Add(string.Format("{0} {1}",fields[i], datas[i]));
108 }
109 }
110 return temp;
111 }
112
113 public string FormatEncoding(string lineData)
114 {
115 string t = "";
116 Regex reg = new Regex("[\u4e00-\u9fa5]+");
117 MatchCollection matches = reg.Matches(lineData);
118 for(int i = 0; i < matches.Count; i++ ) {
119 t += string.Format("{0},",matches[i].Value);
120 }
121 return t.TrimEnd(',');
122 }
123 }
124 }
125
 
下面是調用:
 
日誌檔轉換
1 namespace Log2Sql
2 {
3 class Program
4 {
5 static void Main(string[] args)
6 {
7 string inputDirectory = @"g:\iislog\logs";
8 string outputFileName = @"g:\iislog\log.txt";
9 string errorFileName = @"g:\iislog\error.txt";
10 try{
11 FormatLog log = new FormatLog(inputDirectory, outputFileName, errorFileName);
12 log.Execute();
13 //log.WriteInvalidFields();
14 }
15 catch(Exception e){
16 Console.Write(e.Message);
17 }
18 Console.Write("success");
19 Console.Read();
20 }
21 }
22 }
 
arrow
arrow
    全站熱搜

    戮克 發表在 痞客邦 留言(0) 人氣()