经验首页 前端设计 程序设计 Java相关 移动开发 数据库/运维 软件/图像 大数据/云计算 其他经验
当前位置:技术经验 » 程序设计 » Python » 查看文章
C# pythonnet(1)_传感器数据清洗算法
来源:cnblogs  作者:Karl_Albright  时间:2024/6/25 8:51:33  对本文有异议

Python代码如下

  1. import pandas as pd
  2. # 读取数据
  3. data = pd.read_csv('data_row.csv')
  4. # 检查异常值
  5. def detect_outliers(data):
  6. outliers = []
  7. for col in data.columns:
  8. q1 = data[col].quantile(0.25)
  9. q3 = data[col].quantile(0.75)
  10. iqr = q3 - q1
  11. lower_bound = q1 - 1.5 * iqr
  12. upper_bound = q3 + 1.5 * iqr
  13. outliers.extend(data[(data[col] < lower_bound) | (data[col] > upper_bound)].index)
  14. return list(set(outliers))
  15. outliers = detect_outliers(data)
  16. print("异常数据数量:", len(outliers))
  17. # 处理异常值
  18. data.drop(outliers, inplace=True)
  19. # 保存清洗后的数据
  20. data.to_csv('clean_data_row.csv', index=False)

下面我们修改成C#代码

创建控制台程序,Nuget安装 CsvHelper 和 pythonnet

  1. public class Program
  2. {
  3. const string PathToPythonDir = "D:\\Python311";
  4. const string DllOfPython = "python311.dll";
  5. static void Main(string[] args)
  6. {
  7. // 数据清洗
  8. CleanData();
  9. }
  10. /// <summary>
  11. /// 数据清洗
  12. /// </summary>
  13. static void CleanData()
  14. {
  15. var originDatas = ReadCsvWithCsvHelper("data_row.csv");
  16. var outliers = DetectOutliers(originDatas);
  17. var outlierHashset = new HashSet<int>(outliers);
  18. // 清洗过后的数据
  19. var cleanDatas = originDatas.Where((r, index) => !outlierHashset.Contains(index)).ToList();
  20. try
  21. {
  22. Runtime.PythonDLL = Path.Combine(PathToPythonDir, DllOfPython);
  23. PythonEngine.Initialize();
  24. using (Py.GIL())
  25. {
  26. dynamic pd = Py.Import("pandas");
  27. dynamic np = Py.Import("numpy");
  28. dynamic plt = Py.Import("matplotlib.pyplot");
  29. dynamic fft = Py.Import("scipy.fftpack");
  30. dynamic oData = np.array(originDatas.ToArray());
  31. int oDataLength = oData.__len__();
  32. dynamic data = np.array(cleanDatas.ToArray());
  33. int dataLength = data.__len__();
  34. // 绘制原始数据图和清洗后数据图
  35. plt.figure(figsize: new dynamic[] { 12, 6 });
  36. // 原始数据图
  37. plt.subplot(1, 2, 1);
  38. plt.plot(np.arange(oDataLength), oData);
  39. plt.title("Original Datas");
  40. // 清洗后数据图
  41. plt.subplot(1, 2, 2);
  42. plt.plot(np.arange(dataLength), data);
  43. plt.title("Clean Datas");
  44. // 布局调整,防止重叠
  45. plt.tight_layout();
  46. // 显示图表
  47. plt.show();
  48. }
  49. }
  50. catch (Exception e)
  51. {
  52. Console.WriteLine("报错了:" + e.Message + "\r\n" + e.StackTrace);
  53. }
  54. }
  55. /// <summary>
  56. /// 检测异常值
  57. /// </summary>
  58. /// <param name="datas">原始数据集合</param>
  59. /// <returns>返回异常值在集合中的索引</returns>
  60. static List<int> DetectOutliers(List<double[]> datas)
  61. {
  62. List<int> outliers = new List<int>();
  63. var first = datas.First();
  64. for (int i = 0; i < first.Length; i++)
  65. {
  66. var values = datas.AsEnumerable().Select((row, index) => Tuple.Create(row[i], index)).ToArray();
  67. double q1 = Enumerable.OrderBy(values, x => x.Item1).ElementAt((int)(values.Length * 0.25)).Item1;
  68. double q3 = Enumerable.OrderBy(values, x => x.Item1).ElementAt((int)(values.Length * 0.75)).Item1;
  69. double iqr = q3 - q1;
  70. double lowerBound = q1 - 1.5 * iqr;
  71. double upperBound = q3 + 1.5 * iqr;
  72. outliers.AddRange(values.AsEnumerable()
  73. .Where(row => row.Item1 < lowerBound || row.Item1 > upperBound)
  74. .Select(row => row.Item2));
  75. }
  76. return outliers.Distinct().ToList();
  77. }
  78. /// <summary>
  79. /// 读取CSV数据
  80. /// </summary>
  81. /// <param name="filePath">文件路径</param>
  82. /// <returns>文件中数据集合,都是double类型</returns>
  83. static List<double[]> ReadCsvWithCsvHelper(string filePath)
  84. {
  85. using (var reader = new StreamReader(filePath))
  86. using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
  87. {
  88. var result = new List<double[]>();
  89. // 如果你的CSV文件有标题行,可以调用ReadHeader来读取它们
  90. csv.Read();
  91. csv.ReadHeader();
  92. while (csv.Read())
  93. {
  94. result.Add(new double[] {
  95. csv.GetField<double>(0),
  96. csv.GetField<double>(1),
  97. csv.GetField<double>(2),
  98. });
  99. }
  100. return result;
  101. }
  102. }
  103. }

以下是运行后结果,左边是原始数据折线图,右边是清洗后数据折线图

 源代码:https://gitee.com/Karl_Albright/csharp-demo/tree/master/PythonnetDemo/PythonnetClearData

 

 

 

抽稀算法

  1. def down_sampling(sig,factor=2, axis=0):
  2. '''
  3. 降采样
  4. Inputs:
  5. sig --- numpy array, 信号数据数组
  6. factor --- int, 降采样倍率
  7. axis --- int, 沿着哪个轴进行降采样
  8. '''
  9. Temp=[':']*sig.ndim
  10. Temp[axis]='::'+str(factor)
  11. return eval('sig['+','.join(Temp)+']')
  1. /// <summary>
  2. /// 降采样,其实就是抽稀算法
  3. /// </summary>
  4. static List<double[]> DownSampling(int factor = 2, int axis = 0)
  5. {
  6. if (axis != 0 && axis != 1)
  7. throw new ArgumentException("Axis must be 0 or 1 for a 2D array.");
  8. var datas = ReadCsvWithCsvHelper("clean_data_row3.csv");
  9. int dim0 = datas.Count;
  10. var first = datas.First();
  11. int dim1 = first.Length;
  12. var result = new List<double[]>();
  13. if (axis == 0)
  14. {
  15. var xAxis = dim0 / factor;
  16. var yAxis = dim1;
  17. for (int i = 0; i < xAxis; i++)
  18. {
  19. result.Add(datas[i * factor]);
  20. }
  21. }
  22. else if (axis == 1)
  23. {
  24. var xAxis = dim0;
  25. var yAxis = dim1 / factor;
  26. var item = new double[yAxis];
  27. for (int i = 0; i < xAxis; i++)
  28. {
  29. var deviceData = datas[i];
  30. for (int j = 0; j < yAxis; j++)
  31. {
  32. item[j] = deviceData[j * factor];
  33. }
  34. result.Add(item);
  35. }
  36. }
  37. return result;
  38. }

 源代码:https://gitee.com/Karl_Albright/csharp-demo/tree/master/PythonnetDemo/PythonnetClearData

原文链接:https://www.cnblogs.com/Cxiaoao/p/18261015

 友情链接:直通硅谷  点职佳  北美留学生论坛

本站QQ群:前端 618073944 | Java 606181507 | Python 626812652 | C/C++ 612253063 | 微信 634508462 | 苹果 692586424 | C#/.net 182808419 | PHP 305140648 | 运维 608723728

W3xue 的所有内容仅供测试,对任何法律问题及风险不承担任何责任。通过使用本站内容随之而来的风险与本站无关。
关于我们  |  意见建议  |  捐助我们  |  报错有奖  |  广告合作、友情链接(目前9元/月)请联系QQ:27243702 沸活量
皖ICP备17017327号-2 皖公网安备34020702000426号