C#写的一段解析 CSV 文件的代码 选择自 redv 的 Blog
作者: 火车头 日期: 2006-10-27 14:14
我们经常将Excel格式的文件保存为csv格式以方便上传和修改,可是当数据中包含逗号和双引号的时候Excel会把该字段用双引号括住并把数据中的"改为""
下载: csv.cs
- using System;
- /**//**
- * The Comma Separated Value (CSV) File Format:
- http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
- * 描述:解析 CSV 格式的文件。
- * 由这里
- http://blog.csdn.net/emu/archive/2003/03/01/16338.aspx 的Java代码改写而来(增加了行处理)
- * 日期: 2004-10-22 14:55
- */
- namespace Mitumori {
- /**//// <summary>
- /// CSVUtil 用来处理CSV格式的文件内容成一二维数组。
- /// </summary>
- public class CSVUtil {
- private CSVUtil() {
- }
- /**//// <summary>
- /// 分割 CVS 文件内容为一个二维数组。
- /// </summary>
- /// <param name="src">CVS 文件内容字符串</param>
- /// <returns>二维数组。String[line count][column count]</returns>
- public static String[][] SplitCSV(String src) {
- // 如果输入为空,返回 0 长度字符串数组
- if (src==null || src.Length == 0) return new String[0][]{};
- String st="";
- System.Collections.ArrayList lines = new System.Collections.ArrayList(); // 行集合。其元素为行
- System.Collections.ArrayList cells = new System.Collections.ArrayList(); // 单元格集合。其元素为一个单元格
- bool beginWithQuote = false;
- int maxColumns = 0;
- // 遍历字符串的字符
- for (int i=0;i<src.Length;i++){
- char ch = src[i];
- CR 或者 LF#region CR 或者 LF
- //A record separator may consist of a line feed (ASCII/LF=0x0A),
- //or a carriage return and line feed pair (ASCII/CRLF=0x0D 0x0A).
- // 这里我不明白CR为什么不作为separator呢,在Mac OS上好像是用CR的吧。
- // 这里我“容错”一下,CRLF、LFCR、CR、LF都作为separator
- if (ch == ' ') {
- CR#region CR
- if (beginWithQuote) {
- st += ch;
- }
- else {
- if(i+1 < src.Length && src[i+1] == ' ') { // 如果紧接的是LF,那么直接把LF吃掉
- i++;
- }
- //line = new String[cells.Count];
- //System.Array.Copy (cells.ToArray(typeof(String)), line, line.Length);
- //lines.Add(line); // 把上一行放到行集合中去
- cells.Add(st);
- st = "";
- beginWithQuote = false;
- maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
- lines.Add(cells);
- st = "";
- cells = new System.Collections.ArrayList();
- }
- #endregion CR
- }
- else if (ch == ' ') {
- LF#region LF
- if (beginWithQuote) {
- st += ch;
- }
- else {
- if(i+1 < src.Length && src[i+1] == ' ') { // 如果紧接的是LF,那么直接把LF吃掉
- i++;
- }
- //line = new String[cells.Count];
- //System.Array.Copy (cells.ToArray(typeof(String)), line, line.Length);
- //lines.Add(line); // 把上一行放到行集合中去
- cells.Add(st);
- st = "";
- beginWithQuote = false;
- maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
- lines.Add(cells);
- st = "";
- cells = new System.Collections.ArrayList();
- }
- #endregion LF
- }
- #endregion CR 或者 LF
- else if (ch == '"'){ // 双引号
- 双引号#region 双引号
- if (beginWithQuote){
- i++;
- if (i>=src.Length){
- cells.Add(st);
- st="";
- beginWithQuote=false;
- }
- else{
- ch=src[i];
- if (ch == '"'){
- st += ch;
- }
- else if (ch == ','){
- cells.Add(st);
- st="";
- beginWithQuote = false;
- }
- else{
- throw new Exception("Single double-quote char mustn't exist in filed "
- +(cells.Count+1)+" while it is begined with quote char at:"+i);
- }
- }
- }
- else if (st.Length==0){
- beginWithQuote = true;
- }
- else{
- throw new Exception("Quote cannot exist in a filed which doesn't begin with quote! field:"+(cells.Count+1));
- }
- #endregion 双引号
- }
- else if (ch==','){
- 逗号#region 逗号
- if (beginWithQuote){
- st += ch;
- }
- else{
- cells.Add(st);
- st = "";
- beginWithQuote = false;
- }
- #endregion 逗号
- }
- else{
- 其它字符#region 其它字符
- st += ch;
- #endregion 其它字符
- }
- }
- if (st.Length != 0){
- if (beginWithQuote){
- throw new Exception("last field is begin with but not end with double quote");
- }
- else{
- cells.Add(st);
- maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
- lines.Add(cells);
- }
- }
- String[][] ret = new String[lines.Count][];
- for (int i = 0; i < ret.Length; i++) {
- cells = (System.Collections.ArrayList) lines[i];
- ret[i] = new String[maxColumns];
- for (int j = 0; j < maxColumns; j++) {
- ret[i][j] = cells[j].ToString();
- }
- }
- //System.Array.Copy(lines.ToArray(typeof(String[])), ret, ret.Length);
- return ret;
- }
- public static void aMain(String[] args){
- String src1= ""fh,zg",sdf,"asfs,",",dsdf","
- +""aadf""","""hdfg","fgh""dgnh","
- +"hgfg'dfh,"asdfa""""","""""fgjhg","
- +""gfhg""""hb" ";
- try {
- String[][] Ret = SplitCSV(src1);
- for (int i=0;i<Ret.Length;i++){
- for (int j = 0; j < Ret[i].Length; i++) {
- System.Console.WriteLine(Ret[i][j]);
- }
- System.Console.WriteLine();
- }
- }
- catch(Exception e) {
- System.Console.WriteLine(e.StackTrace);
- }
- }
- }
- }
评论: 0 |
引用: 0 |
阅读: 2260
发表评论
订阅
上一篇
返回
下一篇