← 返回
数据分析 中文

Json Parser

Parse and validate JSON data from construction APIs, IoT sensors, and BIM exports. Transform nested JSON to flat DataFrames.
解析校验建筑API、IoT传感器、BIM导出的JSON;嵌套JSON转扁平DataFrame。
datadrivenconstruction
数据分析 clawhub v2.1.0 1 版本 99850.8 Key: 无需
★ 0
Stars
📥 2,008
下载
💾 36
安装
1
版本
#latest

概述

JSON Parser for Construction Data

Overview

Construction systems increasingly use JSON for data exchange - from IoT sensors to BIM metadata exports. This skill handles parsing, validation, and flattening of JSON structures.

Python Implementation

import json
import pandas as pd
from typing import Dict, Any, List, Optional, Union
from dataclasses import dataclass
from pathlib import Path


@dataclass
class JSONParseResult:
    """Result of JSON parsing operation."""
    success: bool
    data: Any
    errors: List[str]
    record_count: int


class ConstructionJSONParser:
    """Parse JSON data from construction sources."""

    def __init__(self):
        self.errors: List[str] = []

    def parse_file(self, file_path: str) -> JSONParseResult:
        """Parse JSON from file."""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            return JSONParseResult(True, data, [], self._count_records(data))
        except json.JSONDecodeError as e:
            return JSONParseResult(False, None, [f"JSON Error: {e}"], 0)
        except Exception as e:
            return JSONParseResult(False, None, [str(e)], 0)

    def parse_string(self, json_string: str) -> JSONParseResult:
        """Parse JSON from string."""
        try:
            data = json.loads(json_string)
            return JSONParseResult(True, data, [], self._count_records(data))
        except json.JSONDecodeError as e:
            return JSONParseResult(False, None, [f"JSON Error: {e}"], 0)

    def _count_records(self, data: Any) -> int:
        """Count records in data."""
        if isinstance(data, list):
            return len(data)
        elif isinstance(data, dict):
            return 1
        return 0

    def flatten_json(self, data: Dict, prefix: str = '') -> Dict[str, Any]:
        """Flatten nested JSON to single-level dict."""
        flat = {}
        for key, value in data.items():
            new_key = f"{prefix}_{key}" if prefix else key

            if isinstance(value, dict):
                flat.update(self.flatten_json(value, new_key))
            elif isinstance(value, list):
                if all(isinstance(i, (str, int, float, bool, type(None))) for i in value):
                    flat[new_key] = value
                else:
                    for i, item in enumerate(value):
                        if isinstance(item, dict):
                            flat.update(self.flatten_json(item, f"{new_key}_{i}"))
                        else:
                            flat[f"{new_key}_{i}"] = item
            else:
                flat[new_key] = value
        return flat

    def to_dataframe(self, data: Union[List[Dict], Dict]) -> pd.DataFrame:
        """Convert JSON data to DataFrame."""
        if isinstance(data, list):
            flat_records = [self.flatten_json(r) if isinstance(r, dict) else {'value': r} for r in data]
            return pd.DataFrame(flat_records)
        elif isinstance(data, dict):
            if all(isinstance(v, list) for v in data.values()):
                # Dict of lists - columnar format
                return pd.DataFrame(data)
            else:
                flat = self.flatten_json(data)
                return pd.DataFrame([flat])
        return pd.DataFrame()

    def extract_elements(self, data: Dict, path: str) -> List[Any]:
        """Extract elements using dot notation path."""
        parts = path.split('.')
        current = data

        for part in parts:
            if isinstance(current, dict) and part in current:
                current = current[part]
            elif isinstance(current, list) and part.isdigit():
                current = current[int(part)]
            else:
                return []

        return current if isinstance(current, list) else [current]

    def validate_schema(self, data: Dict,
                        required_fields: List[str]) -> Dict[str, Any]:
        """Validate JSON against required fields."""
        flat = self.flatten_json(data)
        missing = [f for f in required_fields if f not in flat]
        present = [f for f in required_fields if f in flat]

        return {
            'valid': len(missing) == 0,
            'missing_fields': missing,
            'present_fields': present,
            'completeness': len(present) / len(required_fields) * 100
        }


# BIM JSON Parser
class BIMJSONParser(ConstructionJSONParser):
    """Specialized parser for BIM JSON exports."""

    def parse_bim_elements(self, data: Dict) -> pd.DataFrame:
        """Parse BIM elements from JSON export."""
        elements = []

        # Common BIM JSON structures
        if 'elements' in data:
            elements = data['elements']
        elif 'objects' in data:
            elements = data['objects']
        elif 'entities' in data:
            elements = data['entities']
        elif isinstance(data, list):
            elements = data

        if not elements:
            return pd.DataFrame()

        # Flatten each element
        flat_elements = []
        for elem in elements:
            if isinstance(elem, dict):
                flat = self.flatten_json(elem)
                flat_elements.append(flat)

        return pd.DataFrame(flat_elements)

    def extract_properties(self, element: Dict) -> Dict[str, Any]:
        """Extract properties from BIM element."""
        props = {}

        # Common property locations in BIM JSON
        for key in ['properties', 'params', 'parameters', 'attributes']:
            if key in element and isinstance(element[key], dict):
                props.update(element[key])

        return props


# IoT JSON Parser
class IoTJSONParser(ConstructionJSONParser):
    """Parser for IoT sensor data."""

    def parse_sensor_reading(self, data: Dict) -> Dict[str, Any]:
        """Parse single sensor reading."""
        return {
            'sensor_id': data.get('sensor_id') or data.get('id'),
            'timestamp': data.get('timestamp') or data.get('time'),
            'value': data.get('value') or data.get('reading'),
            'unit': data.get('unit', ''),
            'location': data.get('location', '')
        }

    def parse_sensor_batch(self, data: List[Dict]) -> pd.DataFrame:
        """Parse batch of sensor readings."""
        readings = [self.parse_sensor_reading(r) for r in data]
        return pd.DataFrame(readings)

Quick Start

parser = ConstructionJSONParser()

# Parse from file
result = parser.parse_file("bim_export.json")
if result.success:
    df = parser.to_dataframe(result.data)
    print(f"Loaded {len(df)} records")

# Flatten nested JSON
flat = parser.flatten_json(result.data)

# Extract specific path
elements = parser.extract_elements(result.data, "project.building.floors")

Common Use Cases

1. BIM Metadata

bim_parser = BIMJSONParser()
result = bim_parser.parse_file("revit_export.json")
elements = bim_parser.parse_bim_elements(result.data)

2. IoT Sensors

iot_parser = IoTJSONParser()
readings = iot_parser.parse_sensor_batch(sensor_data)

3. API Response

parser = ConstructionJSONParser()
result = parser.parse_string(api_response)
df = parser.to_dataframe(result.data)

Resources

  • DDC Book: Chapter 2.1 - Semi-structured Data

版本历史

共 1 个版本

  • v2.1.0 当前
    2026-03-28 20:28 安全 安全

安全检测

腾讯云安全 (Keen)

安全,无风险
查看报告

腾讯云安全 (Sanbu)

安全,无风险
查看报告

🔗 相关推荐

data-analysis

Excel / XLSX

ivangdavila
创建、检查和编辑 Microsoft Excel 工作簿及 XLSX 文件,支持可靠的公式、日期、类型、格式、重算及模板保留功能。
★ 368 📥 140,509
data-analysis

A股量化 AkShare

mbpz
A股量化数据分析工具,基于AkShare库获取A股行情、财务数据、板块信息等。用于回答关于A股股票查询、行情数据、财务分析、选股等问题。
★ 165 📥 60,042
developer-tools

Pdf To Structured

datadrivenconstruction
{"answer":"从建筑PDF提取结构化数据。将规格、BOM、进度表及报告转为Excel/CSV/JSON。扫描件用OCR,原生PDF用pdfplumber。"}
★ 9 📥 4,827