csv-handler
Handle CSV files from construction software exports. Auto-detect delimiters, encodings, and clean messy data.
Install via CLI (Recommended)
clawhub install openclaw/skills/skills/datadrivenconstruction/csv-handlerCSV Handler for Construction Data
Overview
CSV is the universal exchange format in construction - from scheduling exports to cost databases. This skill handles encoding issues, delimiter detection, and data cleaning.
Python Implementation
import pandas as pd
import csv
from typing import Dict, Any, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass
import chardet
@dataclass
class CSVProfile:
"""Profile of CSV file."""
encoding: str
delimiter: str
has_header: bool
row_count: int
column_count: int
columns: List[str]
class ConstructionCSVHandler:
"""Handle CSV files from construction software."""
COMMON_DELIMITERS = [',', ';', '\t', '|']
COMMON_ENCODINGS = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
def __init__(self):
self.last_profile: Optional[CSVProfile] = None
def detect_encoding(self, file_path: str) -> str:
"""Detect file encoding."""
with open(file_path, 'rb') as f:
raw = f.read(10000)
result = chardet.detect(raw)
return result.get('encoding', 'utf-8') or 'utf-8'
def detect_delimiter(self, file_path: str, encoding: str) -> str:
"""Detect CSV delimiter."""
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
sample = f.read(5000)
# Count occurrences
counts = {d: sample.count(d) for d in self.COMMON_DELIMITERS}
# Return most common that appears consistently
if counts:
return max(counts, key=counts.get)
return ','
def profile_csv(self, file_path: str) -> CSVProfile:
"""Profile CSV file."""
encoding = self.detect_encoding(file_path)
delimiter = self.detect_delimiter(file_path, encoding)
# Read sample
df = pd.read_csv(file_path, encoding=encoding, delimiter=delimiter,
nrows=10, on_bad_lines='skip')
has_header = not df.columns[0].replace('.', '').replace('-', '').isdigit()
# Full row count
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
row_count = sum(1 for _ in f) - (1 if has_header else 0)
profile = CSVProfile(
encoding=encoding,
delimiter=delimiter,
has_header=has_header,
row_count=row_count,
column_count=len(df.columns),
columns=list(df.columns)
)
self.last_profile = profile
return profile
def read_csv(self, file_path: str,
encoding: Optional[str] = None,
delimiter: Optional[str] = None,
clean: bool = True) -> pd.DataFrame:
"""Read CSV with auto-detection."""
# Auto-detect if not provided
if encoding is None:
encoding = self.detect_encoding(file_path)
if...
Metadata
Not sure this is the right skill?
Describe what you want to build — we'll match you to the best skill from 16,000+ options.
Find the right skillPaste this into your clawhub.json to enable this plugin.
{
"plugins": {
"official-datadrivenconstruction-csv-handler": {
"enabled": true,
"auto_update": true
}
}
}Related Skills
data-lineage-tracker
Track data origin, transformations, and flow through construction systems. Essential for audit trails, compliance, and debugging data issues.
cwicr-cost-calculator
Calculate construction costs using DDC CWICR resource-based methodology. Break down costs into labor, materials, equipment with transparent pricing.
data-anomaly-detector
Detect anomalies and outliers in construction data: unusual costs, schedule variances, productivity spikes. Statistical and ML-based detection methods.
historical-cost-analyzer
Analyze historical construction costs for benchmarking, trend analysis, and estimating calibration. Compare projects, track escalation, identify patterns.
df-merger
Merge pandas DataFrames from multiple construction sources. Handle different schemas, keys, and data quality issues.