Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | 62x 62x 10x 10x 10x 30x 30x 7x 7x 14x 14x 14x 7x 7x 14x 7x 10x 10x 10x 10x 20x 11x 11x 10x 10x 10x 10x 10x 10x 10x 10x 34x 10x 7x 10x | import Papa from "papaparse";
import {
type CsvDelimiter,
type LocalCsvHeaderScanOptions,
type LocalCsvHeaderScanResult,
SUPPORTED_CSV_DELIMITERS,
} from "./types";
const DEFAULT_SCAN_BYTES = 1024 * 1024;
const UNSUPPORTED_DELIMITER_HINTS = ["\t", "|"];
function stripUtf8Bom(value: string): string {
return value.charCodeAt(0) === 0xfeff ? value.slice(1) : value;
}
function getFirstNonEmptyLine(text: string): string | null {
const lines = text.split(/\r\n|\n|\r/);
return lines.find((line) => line.trim().length > 0) ?? null;
}
function getFirstNonEmptyLines(text: string, count: number): string[] {
const lines = text
.split(/\r\n|\n|\r/)
.filter((line) => line.trim().length > 0);
return lines.slice(0, count);
}
function parseHeaderRow(text: string, delimiter: CsvDelimiter): string[] {
const parsed = Papa.parse<string[]>(text, {
delimiter,
preview: 1,
skipEmptyLines: true,
});
return parsed.data[0] ?? [];
}
function parseFirstDataRow(
text: string,
delimiter: CsvDelimiter,
): Record<string, string> | null {
const parsed = Papa.parse<Record<string, string>>(text, {
header: true,
delimiter,
preview: 1,
skipEmptyLines: true,
});
return parsed.data[0] ?? null;
}
function isNumericCell(value: string | undefined): boolean {
const trimmedValue = value?.trim() ?? "";
if (!trimmedValue) {
return false;
}
const parsedNumber = Number(trimmedValue);
return Number.isFinite(parsedNumber);
}
function getNonNumericColumnWarnings(
text: string,
delimiter: CsvDelimiter,
headers: string[],
): string[] {
const firstDataRow = parseFirstDataRow(text, delimiter);
if (!firstDataRow) {
return [];
}
const nonNumericHeaders = headers.filter(
(header) => !isNumericCell(firstDataRow[header]),
);
if (nonNumericHeaders.length === 0) {
return [];
}
return [
`The first data row is non-numeric for: ${nonNumericHeaders.join(", ")}. Scenario import will skip those columns.`,
];
}
/**
* Detect the supported delimiter that yields the most header columns on the first
* non-empty line.
*
* A valid single-column CSV has no delimiter characters, so after ruling out
* obvious unsupported delimiters (tab/pipe), we default to comma instead of
* treating the file as invalid.
*/
export function detectSupportedDelimiter(text: string): CsvDelimiter | null {
const firstLine = getFirstNonEmptyLine(text);
if (!firstLine) {
return null;
}
let bestDelimiter: CsvDelimiter | null = null;
let bestColumnCount = 0;
SUPPORTED_CSV_DELIMITERS.forEach((delimiter) => {
const headers = parseHeaderRow(firstLine, delimiter);
if (headers.length > bestColumnCount) {
bestDelimiter = delimiter;
bestColumnCount = headers.length;
}
});
if (bestColumnCount > 1) {
return bestDelimiter;
}
const firstTwoLines = getFirstNonEmptyLines(text, 2);
const hasConsistentUnsupportedDelimiter = UNSUPPORTED_DELIMITER_HINTS.some(
(hint) =>
firstTwoLines.length > 0 &&
firstTwoLines.every((line) => line.includes(hint)),
);
if (hasConsistentUnsupportedDelimiter) {
return null;
}
// No delimiter signal usually means a single-column CSV, which is valid.
return ",";
}
async function readCsvSlice(
file: File,
bytesToRead: number,
): Promise<{ text: string; warnings: string[] }> {
if (file.size === 0) {
return {
text: "",
warnings: ["The selected CSV file is empty."],
};
}
const buffer = await file.slice(0, bytesToRead).arrayBuffer();
try {
const decoder = new TextDecoder("utf-8", { fatal: true });
return {
text: stripUtf8Bom(decoder.decode(buffer)),
warnings: [],
};
} catch (error) {
if (!(error instanceof TypeError)) {
throw error;
}
if (bytesToRead >= file.size) {
throw error;
}
const decoder = new TextDecoder("utf-8", { fatal: false });
return {
text: stripUtf8Bom(decoder.decode(buffer)),
warnings: [
"The CSV preview ended in the middle of a UTF-8 character. Continue if the detected headers look correct.",
],
};
}
}
/**
* Read only an initial slice of the CSV so the UI can show a fast local preview
* without loading the entire file into memory.
*/
export async function scanCsvHeadersLocal(
file: File,
options: LocalCsvHeaderScanOptions = {},
): Promise<LocalCsvHeaderScanResult> {
const { bytesToRead = DEFAULT_SCAN_BYTES, warnOnNonNumericColumns = false } =
options;
const { text, warnings: readWarnings } = await readCsvSlice(
file,
bytesToRead,
);
if (!text.trim()) {
return {
headers: [],
guessedDelimiter: null,
warnings: readWarnings,
};
}
const delimiter = detectSupportedDelimiter(text);
if (!delimiter) {
return {
headers: [],
guessedDelimiter: null,
warnings: [
...readWarnings,
"Only comma and semicolon delimited CSV files are supported.",
],
};
}
const headers = parseHeaderRow(text, delimiter).map((header) =>
header.trim(),
);
const warnings = new Set(readWarnings);
if (headers.length === 0) {
warnings.add("No CSV header row was detected.");
}
if (warnOnNonNumericColumns && headers.length > 0) {
getNonNumericColumnWarnings(text, delimiter, headers).forEach((warning) =>
warnings.add(warning),
);
}
return {
headers,
guessedDelimiter: delimiter,
warnings: Array.from(warnings),
};
}
|