diff --git a/.env.example b/.env.example index c214bbc..f02aaac 100644 --- a/.env.example +++ b/.env.example @@ -64,3 +64,8 @@ AWS_BUCKET= AWS_USE_PATH_STYLE_ENDPOINT=false VITE_APP_NAME="${APP_NAME}" + +# Optional: voller Pfad zu tesseract (Windows/WAMP: oft noetig, wenn PHP kein PATH hat) +# RECEIPT_OCR_BIN="C:/Program Files/Tesseract-OCR/tesseract.exe" +# true: beim OCR raw_meta.store_guess_debug (Zeilen vs. Spar) speichern +# RECEIPT_OCR_DEBUG_STORE=false diff --git a/app/Http/Controllers/ReceiptScanController.php b/app/Http/Controllers/ReceiptScanController.php index bbf3d0d..4677374 100644 --- a/app/Http/Controllers/ReceiptScanController.php +++ b/app/Http/Controllers/ReceiptScanController.php @@ -5,10 +5,16 @@ namespace App\Http\Controllers; use App\Http\Controllers\Concerns\ResolvesCurrentShoppingList; use App\Http\Requests\StoreReceiptScanRequest; use App\Http\Requests\UpdateReceiptScanRequest; +use App\Models\ItemPriceLog; use App\Models\ReceiptScan; +use App\Models\ShoppingItem; +use App\Models\Store; use App\Services\ReceiptOcr\ReceiptOcrService; use Illuminate\Contracts\View\View; use Illuminate\Http\RedirectResponse; +use Illuminate\Http\Request; +use Illuminate\Support\Carbon; +use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\Storage; class ReceiptScanController extends Controller @@ -26,10 +32,22 @@ class ReceiptScanController extends Controller ->where('shopping_list_id', $currentList->id) ->latest() ->paginate(12); + $listProductLookup = $this->buildShoppingListProductLookup((int) $currentList->id); + + $scans->getCollection()->transform(function (ReceiptScan $scan) { + $meta = is_array($scan->raw_meta) ? $scan->raw_meta : []; + $scan->setAttribute( + 'item_suggestions', + $this->normalizeItemSuggestions($meta['item_suggestions'] ?? $this->extractItemSuggestions($scan->ocr_text)) + ); + + return $scan; + }); return view('receipt-scans.index', [ 'currentList' => $currentList, 'scans' => $scans, + 'listProductLookup' => $listProductLookup, 'uploadLimits' => [ 'upload_max_filesize' => (string) ini_get('upload_max_filesize'), 'post_max_size' => (string) ini_get('post_max_size'), @@ -48,6 +66,9 @@ class ReceiptScanController extends Controller $path = $request->file('receipt_photo')->store('receipt-scans', 'public'); $absolute = Storage::disk('public')->path($path); $ocr = $ocrService->extractFromImage($absolute); + $itemSuggestions = $this->extractItemSuggestions($ocr['text']); + $rawMeta = is_array($ocr['meta']) ? $ocr['meta'] : []; + $rawMeta['item_suggestions'] = $itemSuggestions; ReceiptScan::query()->create([ 'shopping_list_id' => $currentList->id, @@ -57,12 +78,70 @@ class ReceiptScanController extends Controller 'store_name' => $request->filled('store_name') ? $request->string('store_name')->toString() : $ocr['store_name'], 'receipt_date' => $request->filled('receipt_date') ? $request->date('receipt_date')?->toDateString() : $ocr['receipt_date'], 'total_decimal' => $request->filled('total_decimal') ? $request->input('total_decimal') : $ocr['total_decimal'], - 'raw_meta' => $ocr['meta'], + 'raw_meta' => $rawMeta, + ]); + + $message = 'Kassazettel gespeichert.'; + if ($ocr['ok']) { + $message = 'Kassazettel gespeichert und OCR ausgewertet.'; + } else { + $hint = is_array($ocr['meta'] ?? null) ? ($ocr['meta']['hint'] ?? null) : null; + if (is_string($hint) && $hint !== '') { + $message .= ' '.$hint; + } else { + $message .= ' OCR konnte den Inhalt nicht lesen (siehe Details beim Kassabon).'; + } + } + + return back()->with('status', $message); + } + + public function reprocessOcr(Request $request, ReceiptScan $receiptScan, ReceiptOcrService $ocrService): RedirectResponse + { + $currentList = $this->currentShoppingList($request); + $this->authorize('view', $currentList); + abort_if((int) $receiptScan->shopping_list_id !== (int) $currentList->id, 403); + + $path = $receiptScan->image_path; + $absolute = Storage::disk('public')->path($path); + $ocr = $ocrService->extractFromImage($absolute); + $itemSuggestions = $this->extractItemSuggestions($ocr['text']); + $rawMeta = is_array($ocr['meta']) ? $ocr['meta'] : []; + $rawMeta['item_suggestions'] = $itemSuggestions; + $previousMeta = is_array($receiptScan->raw_meta) ? $receiptScan->raw_meta : []; + if (isset($previousMeta['validated_items'])) { + $rawMeta['validated_items'] = $previousMeta['validated_items']; + } + if (isset($previousMeta['validated_at'])) { + $rawMeta['validated_at'] = $previousMeta['validated_at']; + } + + $nextStoreName = $receiptScan->store_name; + if (trim((string) $nextStoreName) === '' && is_string($ocr['store_name'] ?? null) && trim($ocr['store_name']) !== '') { + $nextStoreName = trim($ocr['store_name']); + } + + $nextReceiptDate = $receiptScan->receipt_date; + if (empty($nextReceiptDate) && is_string($ocr['receipt_date'] ?? null) && trim($ocr['receipt_date']) !== '') { + $nextReceiptDate = trim($ocr['receipt_date']); + } + + $nextTotalDecimal = $receiptScan->total_decimal; + if (empty($nextTotalDecimal) && is_string($ocr['total_decimal'] ?? null) && trim($ocr['total_decimal']) !== '') { + $nextTotalDecimal = trim($ocr['total_decimal']); + } + + $receiptScan->update([ + 'ocr_text' => $ocr['text'], + 'store_name' => $nextStoreName, + 'receipt_date' => $nextReceiptDate, + 'total_decimal' => $nextTotalDecimal, + 'raw_meta' => $rawMeta, ]); $message = $ocr['ok'] - ? 'Kassazettel gespeichert und OCR ausgewertet.' - : 'Kassazettel gespeichert. OCR war nicht verfuegbar (Bild kann spaeter manuell erfasst werden).'; + ? 'OCR erneut ausgefuehrt.' + : 'OCR erneut versucht – kein Text erkannt.'.(is_string($rawMeta['hint'] ?? null) ? ' '.$rawMeta['hint'] : ''); return back()->with('status', $message); } @@ -81,4 +160,454 @@ class ReceiptScanController extends Controller return back()->with('status', 'Kassazettel-Daten aktualisiert.'); } + + public function applyItems(Request $request, ReceiptScan $receiptScan): RedirectResponse + { + $currentList = $this->currentShoppingList($request); + $this->authorize('view', $currentList); + abort_if((int) $receiptScan->shopping_list_id !== (int) $currentList->id, 403); + + $validated = $request->validate([ + 'row_labels' => ['nullable', 'array'], + 'row_labels.*' => ['nullable', 'string', 'max:255'], + 'row_prices' => ['nullable', 'array'], + 'row_prices.*' => ['nullable', 'string', 'max:64'], + 'row_qty' => ['nullable', 'array'], + 'row_qty.*' => ['nullable', 'string', 'max:64'], + 'row_take' => ['nullable', 'array'], + 'row_take.*' => ['nullable', 'in:1'], + ]); + + $labels = $validated['row_labels'] ?? []; + $prices = $validated['row_prices'] ?? []; + $qtys = $validated['row_qty'] ?? []; + $take = $validated['row_take'] ?? []; + + $rows = collect($labels) + ->map(function ($label, $i) use ($prices, $qtys, $take) { + if (! isset($take[$i])) { + return null; + } + $label = trim((string) $label); + if ($label === '') { + return null; + } + + return [ + 'label' => $label, + 'price_raw' => trim((string) ($prices[$i] ?? '')), + 'quantity_raw' => trim((string) ($qtys[$i] ?? '')), + ]; + }) + ->filter() + ->unique(fn (array $r) => mb_strtolower($r['label'])) + ->values(); + + if ($rows->isEmpty()) { + return back()->with('status', 'Keine Position mit Haken ausgewaehlt oder alle Artikelnamen leer.'); + } + + $storeId = $this->resolveStoreIdFromReceiptName($receiptScan->store_name, $request->user()->id); + $loggedAt = $receiptScan->receipt_date !== null + ? Carbon::parse($receiptScan->receipt_date)->endOfDay() + : Carbon::now(); + + $created = 0; + $markedDone = 0; + $pricesLogged = 0; + + DB::transaction(function () use ($rows, $currentList, $request, $storeId, $loggedAt, &$created, &$markedDone, &$pricesLogged): void { + foreach ($rows as $row) { + $productName = $row['label']; + $priceDecimal = $this->parsePriceDecimalFromRaw($row['price_raw']); + + $existing = ShoppingItem::query() + ->where('shopping_list_id', $currentList->id) + ->whereRaw('LOWER(product_name) = ?', [mb_strtolower($productName)]) + ->latest('id') + ->first(); + + if ($existing !== null) { + if (! $existing->is_done) { + $update = [ + 'is_done' => true, + 'done_at' => Carbon::now(), + 'store_id' => $storeId ?? $existing->store_id, + ]; + if (($row['quantity_raw'] ?? '') !== '') { + $update['quantity'] = $row['quantity_raw']; + } + $existing->update($update); + $markedDone++; + if ($priceDecimal !== null) { + ItemPriceLog::query()->create([ + 'shopping_item_id' => $existing->id, + 'store_id' => $storeId, + 'price_decimal' => $priceDecimal, + 'currency' => 'EUR', + 'logged_at' => $loggedAt, + 'photo_path' => null, + 'source' => 'receipt_ocr', + ]); + $pricesLogged++; + } + } + + continue; + } + + $item = ShoppingItem::query()->create([ + 'shopping_list_id' => $currentList->id, + 'created_by' => $request->user()->id, + 'product_name' => $productName, + 'quantity' => ($row['quantity_raw'] ?? '') !== '' ? $row['quantity_raw'] : null, + 'store_id' => $storeId, + 'is_done' => true, + 'done_at' => Carbon::now(), + ]); + $created++; + + if ($priceDecimal !== null) { + ItemPriceLog::query()->create([ + 'shopping_item_id' => $item->id, + 'store_id' => $storeId, + 'price_decimal' => $priceDecimal, + 'currency' => 'EUR', + 'logged_at' => $loggedAt, + 'photo_path' => null, + 'source' => 'receipt_ocr', + ]); + $pricesLogged++; + } + } + }); + + $meta = is_array($receiptScan->raw_meta) ? $receiptScan->raw_meta : []; + $meta['validated_items'] = $rows->all(); + $meta['validated_at'] = Carbon::now()->toISOString(); + $receiptScan->update(['raw_meta' => $meta]); + + return back()->with( + 'status', + "Uebernommen: {$created} neu, {$markedDone} offen->erledigt, {$pricesLogged} mit Preis." + ); + } + + /** + * @param list|mixed $raw + * @return list + */ + /** + * Gleicher Name wie in applyItems: LOWER(product_name) -> offen schlaegt erledigt. + * + * @return array + */ + private function buildShoppingListProductLookup(int $shoppingListId): array + { + $items = ShoppingItem::query() + ->where('shopping_list_id', $shoppingListId) + ->get(['product_name', 'is_done']); + + $out = []; + foreach ($items as $item) { + $key = mb_strtolower(trim((string) $item->product_name)); + if ($key === '') { + continue; + } + if (! $item->is_done) { + $out[$key] = 'open'; + } elseif (! isset($out[$key])) { + $out[$key] = 'done'; + } + } + + return $out; + } + + private function normalizeItemSuggestions(mixed $raw): array + { + if (! is_array($raw)) { + return []; + } + + $out = []; + foreach ($raw as $row) { + if (is_string($row)) { + $out[] = ['label' => $row, 'price_raw' => '', 'quantity_raw' => '', 'is_uncertain' => false]; + + continue; + } + if (is_array($row) && isset($row['label'])) { + $pr = isset($row['price_raw']) ? (string) $row['price_raw'] : ''; + $out[] = [ + 'label' => (string) $row['label'], + 'price_raw' => $this->stripVatLetterFromPriceField($pr), + 'quantity_raw' => isset($row['quantity_raw']) ? trim((string) $row['quantity_raw']) : '', + 'is_uncertain' => (bool) ($row['is_uncertain'] ?? false), + ]; + } + } + + return $out; + } + + private function resolveStoreIdFromReceiptName(?string $storeName, int $userId): ?int + { + $storeName = trim((string) $storeName); + if ($storeName === '') { + return null; + } + + $normalized = mb_strtolower($storeName); + $store = Store::query()->firstOrCreate( + ['normalized_name' => $normalized], + [ + 'name' => $storeName, + 'search_url_template' => Store::defaultSearchTemplateForName($normalized), + 'created_by' => $userId, + ] + ); + + return $store->id; + } + + private function parsePriceDecimalFromRaw(string $raw): ?string + { + $raw = $this->stripVatLetterFromPriceField($raw); + if ($raw === '') { + return null; + } + + if (preg_match('/(\d+)[.,](\d{2})(?!\d)/', $raw, $m)) { + return number_format((float) ($m[1].'.'.$m[2]), 2, '.', ''); + } + + return null; + } + + /** + * Buchstaben A–E nach dem Betrag sind auf oesterreichischen Bons typisch nur MwSt-Kennzeichen, kein Preisbestandteil. + */ + private function stripVatLetterFromPriceField(string $raw): string + { + $raw = trim($raw); + if ($raw === '') { + return ''; + } + + return trim(preg_replace('/\s+[A-E]\s*$/u', '', $raw) ?? $raw); + } + + /** + * @return list + */ + private function extractItemSuggestions(?string $ocrText): array + { + $text = (string) $ocrText; + if (trim($text) === '') { + return []; + } + + $lines = preg_split('/\R+/', $text) ?: []; + $startIndex = 0; + foreach ($lines as $idx => $line) { + $candidate = trim((string) $line); + if ($candidate === '') { + continue; + } + if (preg_match('/\b\d{2}[.\/-]\d{2}[.\/-]\d{2,4}\b/u', $candidate) === 1) { + // Artikel starten typischerweise unter dem Datum. + $startIndex = $idx + 1; + break; + } + } + + $items = []; + $uncertain = []; + $pending = null; + + for ($i = $startIndex; $i < count($lines); $i++) { + $rawLine = $lines[$i]; + $trim = trim($rawLine); + if ($trim === '' || mb_strlen($trim) > 120) { + continue; + } + + if ($pending !== null) { + $qtyParsed = $this->parseQuantityTimesUnitLine($rawLine, $trim, false); + if ($qtyParsed === null) { + $qtyParsed = $this->parseQuantityTimesUnitLine($rawLine, $trim, true); + } + if ($qtyParsed !== null) { + $items[] = [ + 'label' => $pending, + 'price_raw' => $this->stripVatLetterFromPriceField($qtyParsed['unit_raw']), + 'quantity_raw' => $qtyParsed['quantity'].' Stück', + 'is_uncertain' => false, + ]; + $pending = null; + + continue; + } + $uncertain[] = [ + 'label' => $pending, + 'price_raw' => '', + 'quantity_raw' => '', + 'is_uncertain' => true, + ]; + $pending = null; + $i--; + + continue; + } + + if (str_starts_with($trim, '=') || str_starts_with($trim, '~')) { + continue; + } + + if (! preg_match('/\p{L}/u', $trim)) { + continue; + } + + if ($this->isReceiptLineBlacklisted($trim)) { + continue; + } + + $standard = $this->parseStandardProductPriceLine($trim); + if ($standard !== null) { + $items[] = array_merge($standard, ['quantity_raw' => '', 'is_uncertain' => false]); + + continue; + } + + if ($this->isLikelyProductNameOnlyLine($trim)) { + $name = $this->cleanArticleNameString($trim); + if ($name !== null && mb_strlen($name) >= 2) { + $pending = $name; + } + } else { + $name = $this->cleanArticleNameString($trim); + if ($name !== null && mb_strlen($name) >= 2) { + $uncertain[] = [ + 'label' => $name, + 'price_raw' => '', + 'quantity_raw' => '', + 'is_uncertain' => true, + ]; + } + } + } + + if ($pending !== null) { + $uncertain[] = [ + 'label' => $pending, + 'price_raw' => '', + 'quantity_raw' => '', + 'is_uncertain' => true, + ]; + } + + return collect($items) + ->merge($uncertain) + ->unique(fn (array $r) => mb_strtolower($r['label']).'|'.($r['is_uncertain'] ? 'u' : 'c')) + ->take(40) + ->values() + ->all(); + } + + private function isReceiptLineBlacklisted(string $trim): bool + { + $blacklist = [ + 'summe', 'gesamt', 'zu zahlen', 'betrag', 'mwst', 'ust', 'steuer', 'rabatt', + 'karte', 'bar', 'zahlung', 'kasse', 'beleg', 'datum', 'uhr', 'eur', 'euro', + 'filiale', 'bon', 'storno', 'wechselgeld', 'pfand', 'mengenvorteil', 'aktionsersparnis', + ]; + $normalizedLower = mb_strtolower($trim); + + return collect($blacklist)->contains(fn ($word) => str_contains($normalizedLower, $word)); + } + + /** + * Eingerueckte Zeile: "2 x 1,49" optional Gesamt "2,98" (MwSt-Buchstabe am Ende wie ueblich ignorieren). + * + * @return array{quantity: int, unit_raw: string}|null + */ + private function parseQuantityTimesUnitLine(string $rawLine, string $trimmed, bool $allowWithoutIndent): ?array + { + if (! $allowWithoutIndent && ! preg_match('/^\s{2,}/', $rawLine)) { + return null; + } + + $t = preg_replace('/\s+([A-E])\s*$/u', '', $trimmed) ?? $trimmed; + + if (! preg_match('/^(\d+)\s*[xX×]\s*(\d+[.,]\d{2})(?:\s+(\d+[.,]\d{2}))?\s*$/u', $t, $m)) { + return null; + } + + return [ + 'quantity' => (int) $m[1], + 'unit_raw' => $m[2], + ]; + } + + private function isLikelyProductNameOnlyLine(string $trim): bool + { + if (preg_match('/\d+[.,]\d{2}\s*$/u', $trim)) { + return false; + } + if (preg_match('/^(\d+)\s*[xX×]\s*(\d+[.,]\d{2})/u', $trim)) { + return false; + } + + return true; + } + + /** + * @return array{label: string, price_raw: string}|null + */ + private function parseStandardProductPriceLine(string $line): ?array + { + if (! preg_match('/\d+[.,]\d{2}/', $line)) { + return null; + } + + if (! preg_match('/^(.*?)\s+(\d+[.,]\d{2})\s*([A-E])?\s*$/u', $line, $m)) { + return null; + } + + $article = trim($m[1]); + $article = preg_replace('/^[|©=]\s*/u', '', $article) ?? $article; + $article = trim($article); + if (preg_match('/^[A-Z]\s+\p{L}/u', $article)) { + $article = preg_replace('/^[A-Z]\s+/u', '', $article, 1); + $article = trim($article); + } + $article = preg_replace('/^\d+\s*[x*]\s*/iu', '', $article) ?? $article; + $article = preg_replace('/\s{2,}/', ' ', $article) ?? $article; + $article = trim($article, " \t\n\r\0\x0B-.:"); + + if ($article === '' || mb_strlen($article) < 2) { + return null; + } + + return [ + 'label' => $article, + 'price_raw' => trim($m[2]), + ]; + } + + private function cleanArticleNameString(string $line): ?string + { + $article = trim($line); + $article = preg_replace('/^[|©=]\s*/u', '', $article) ?? $article; + $article = trim($article); + if (preg_match('/^[A-Z]\s+\p{L}/u', $article)) { + $article = preg_replace('/^[A-Z]\s+/u', '', $article, 1); + $article = trim($article); + } + $article = preg_replace('/\s{2,}/', ' ', $article) ?? $article; + $article = trim($article, " \t\n\r\0\x0B-.:"); + + return $article !== '' ? $article : null; + } } diff --git a/app/Http/Controllers/StoreSearchController.php b/app/Http/Controllers/StoreSearchController.php new file mode 100644 index 0000000..c069b3d --- /dev/null +++ b/app/Http/Controllers/StoreSearchController.php @@ -0,0 +1,37 @@ +validate([ + 'store' => ['required', 'string'], + 'q' => ['required', 'string', 'max:255'], + ]); + + $store = mb_strtolower($request->string('store')->toString()); + $query = $request->string('q')->toString(); + + if ($store !== 'spar') { + return response()->json([ + 'results' => [], + 'message' => 'Aktuell ist nur Spar angebunden.', + ]); + } + + $search = $sparSearchService->search($query, 5); + + return response()->json([ + 'results' => $search['results'], + 'from_cache' => $search['from_cache'], + 'fetched_at' => $search['fetched_at'], + 'source_url' => 'https://www.spar.at/suche?q='.urlencode($query), + ]); + } +} diff --git a/app/Services/ReceiptOcr/ReceiptOcrService.php b/app/Services/ReceiptOcr/ReceiptOcrService.php index 673060a..9947f5e 100644 --- a/app/Services/ReceiptOcr/ReceiptOcrService.php +++ b/app/Services/ReceiptOcr/ReceiptOcrService.php @@ -2,19 +2,14 @@ namespace App\Services\ReceiptOcr; +use App\Models\Store; use Illuminate\Support\Carbon; class ReceiptOcrService { public function isAvailable(): bool { - $bin = (string) config('app.receipt_ocr_bin', env('RECEIPT_OCR_BIN', 'tesseract')); - $command = sprintf('%s --version 2>&1', escapeshellcmd($bin)); - $output = []; - $exitCode = 0; - @exec($command, $output, $exitCode); - - return $exitCode === 0; + return $this->resolveBinary() !== null; } /** @@ -22,8 +17,8 @@ class ReceiptOcrService */ public function extractFromImage(string $absolutePath): array { - $bin = (string) config('app.receipt_ocr_bin', env('RECEIPT_OCR_BIN', 'tesseract')); - if (! $this->isAvailable()) { + $bin = $this->resolveBinary(); + if ($bin === null) { return [ 'ok' => false, 'text' => null, @@ -32,22 +27,13 @@ class ReceiptOcrService 'total_decimal' => null, 'meta' => [ 'error' => 'ocr_unavailable', - 'hint' => 'Installiere tesseract und setze optional RECEIPT_OCR_BIN.', + 'hint' => 'Installiere Tesseract und setze optional RECEIPT_OCR_BIN (voller Pfad unter Windows).', ], ]; } - $command = sprintf( - '%s %s stdout -l deu+eng 2>&1', - escapeshellcmd($bin), - escapeshellarg($absolutePath) - ); - - $output = []; - $exitCode = 0; - @exec($command, $output, $exitCode); - - if ($exitCode !== 0) { + $prepared = $this->prepareInputFile($absolutePath); + if (($prepared['error'] ?? null) !== null) { return [ 'ok' => false, 'text' => null, @@ -55,36 +41,368 @@ class ReceiptOcrService 'receipt_date' => null, 'total_decimal' => null, 'meta' => [ - 'error' => 'ocr_failed', - 'exit_code' => $exitCode, - 'command' => $command, - 'output' => implode("\n", $output), + 'error' => $prepared['error'], + 'hint' => $prepared['hint'] ?? null, ], ]; } - $text = trim(implode("\n", $output)); - $store = $this->guessStoreName($text); - $receiptDate = $this->guessDate($text); - $total = $this->guessTotal($text); + $workPath = $prepared['path']; + $cleanup = $prepared['cleanup']; + $preprocessMeta = null; - return [ - 'ok' => true, - 'text' => $text !== '' ? $text : null, - 'store_name' => $store, - 'receipt_date' => $receiptDate, - 'total_decimal' => $total, - 'meta' => [ + $preprocessed = $this->preprocessImageForOcr($workPath); + if ($preprocessed !== null) { + $workPath = $preprocessed['path']; + $cleanup[] = $preprocessed['path']; + $preprocessMeta = $preprocessed['meta']; + } + + $outBase = rtrim(sys_get_temp_dir(), DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR.'ocr_'.uniqid('', true); + + try { + // Dateiausgabe statt stdout: unter Windows zuverlaessiger als Ausgabe-Capture. + $command = sprintf( + '%s %s %s -l deu+eng --psm 6 2>&1', + $this->escapeExecutable($bin), + escapeshellarg($workPath), + escapeshellarg($outBase) + ); + + $output = []; + $exitCode = 0; + @exec($command, $output, $exitCode); + + $txtPath = $outBase.'.txt'; + $fileText = is_file($txtPath) ? (string) file_get_contents($txtPath) : ''; + $text = trim($fileText); + if ($text === '' && $exitCode === 0) { + $text = trim(implode("\n", $output)); + } + + if ($exitCode !== 0) { + return [ + 'ok' => false, + 'text' => null, + 'store_name' => null, + 'receipt_date' => null, + 'total_decimal' => null, + 'meta' => [ + 'error' => 'ocr_failed', + 'exit_code' => $exitCode, + 'command' => $command, + 'output' => implode("\n", $output), + ], + ]; + } + + if ($text === '') { + return [ + 'ok' => false, + 'text' => null, + 'store_name' => null, + 'receipt_date' => null, + 'total_decimal' => null, + 'meta' => [ + 'error' => 'ocr_empty', + 'hint' => 'Kein Text erkannt. Bitte schaerferes Foto (JPG/PNG) oder anderen Ausschnitt; PDF ggf. als Bild exportieren.', + 'command' => $command, + 'stderr_tail' => implode("\n", array_slice($output, -15)), + ], + ]; + } + + $store = $this->guessStoreName($text); + $receiptDate = $this->guessDate($text); + $total = $this->guessTotal($text); + + $meta = [ 'engine' => 'tesseract', 'command' => $command, 'exit_code' => $exitCode, - ], + ]; + if (is_array($preprocessMeta)) { + $meta['preprocess'] = $preprocessMeta; + } + if (config('app.receipt_ocr_debug_store')) { + $meta['store_guess_debug'] = $this->buildStoreGuessDebug($text, $store); + } + + return [ + 'ok' => true, + 'text' => $text, + 'store_name' => $store, + 'receipt_date' => $receiptDate, + 'total_decimal' => $total, + 'meta' => $meta, + ]; + } finally { + foreach ($cleanup as $tmp) { + if (is_string($tmp) && $tmp !== '' && is_file($tmp)) { + @unlink($tmp); + } + } + if (is_file($outBase.'.txt')) { + @unlink($outBase.'.txt'); + } + } + } + + /** + * @return array{path: string, cleanup: list, error?: null, hint?: null}|array{path: null, cleanup: list, error: string, hint: string} + */ + private function prepareInputFile(string $absolutePath): array + { + $cleanup = []; + if (! is_file($absolutePath)) { + return [ + 'path' => null, + 'cleanup' => [], + 'error' => 'file_missing', + 'hint' => 'Hochgeladene Datei wurde nicht gefunden.', + ]; + } + + $ext = strtolower(pathinfo($absolutePath, PATHINFO_EXTENSION)); + + if ($ext === 'pdf') { + $png = $this->convertWithImagick($absolutePath, 'pdf'); + if ($png === null) { + return [ + 'path' => null, + 'cleanup' => [], + 'error' => 'pdf_not_supported', + 'hint' => 'PDF konnte nicht in ein Bild umgewandelt werden. Bitte Kassabon als JPG/PNG fotografieren oder PHP-Imagick + Ghostscript installieren.', + ]; + } + $cleanup[] = $png; + + return ['path' => $png, 'cleanup' => $cleanup]; + } + + if (in_array($ext, ['heic', 'heif'], true)) { + $png = $this->convertWithImagick($absolutePath, 'heic'); + if ($png === null) { + return [ + 'path' => null, + 'cleanup' => [], + 'error' => 'heic_not_supported', + 'hint' => 'HEIC/HEIF wird hier nicht unterstuetzt. Bitte am Handy auf JPG umstellen oder konvertieren.', + ]; + } + $cleanup[] = $png; + + return ['path' => $png, 'cleanup' => $cleanup]; + } + + return ['path' => $absolutePath, 'cleanup' => $cleanup]; + } + + private function convertWithImagick(string $absolutePath, string $kind): ?string + { + if (! extension_loaded('imagick')) { + return null; + } + + try { + $imagickClass = 'Imagick'; + if (! class_exists($imagickClass)) { + return null; + } + /** @var object $im */ + $im = new $imagickClass(); + if ($kind === 'pdf') { + $im->setResolution(200, 200); + $im->readImage($absolutePath.'[0]'); + } else { + $im->readImage($absolutePath); + } + $im->setImageFormat('png'); + $png = rtrim(sys_get_temp_dir(), DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR.'ocrimg_'.uniqid('', true).'.png'; + $im->writeImage($png); + $im->clear(); + $im->destroy(); + + return is_file($png) ? $png : null; + } catch (\Throwable) { + return null; + } + } + + /** + * Leichte Vorverarbeitung gegen schiefe/kontrastarme Bons. + * + * @return array{path: string, meta: array}|null + */ + private function preprocessImageForOcr(string $sourcePath): ?array + { + if (! extension_loaded('imagick') || ! is_file($sourcePath)) { + return null; + } + + $ext = strtolower((string) pathinfo($sourcePath, PATHINFO_EXTENSION)); + if (! in_array($ext, ['jpg', 'jpeg', 'png', 'webp', 'bmp', 'tif', 'tiff', 'gif'], true)) { + return null; + } + + try { + $imagickClass = 'Imagick'; + if (! class_exists($imagickClass)) { + return null; + } + /** @var object $im */ + $im = new $imagickClass(); + $im->readImage($sourcePath); + $im->setImageFormat('png'); + $im->stripImage(); + + if (method_exists($im, 'autoOrient')) { + $im->autoOrient(); + } + + // Typischer OCR-Boost: graustufen + normalisieren + leicht schaerfen + deskew. + $im->setImageColorspace(2); + $im->normalizeImage(); + $im->sharpenImage(0, 1.0); + $im->deskewImage(40); + + $png = rtrim(sys_get_temp_dir(), DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR.'ocrprep_'.uniqid('', true).'.png'; + $im->writeImage($png); + $im->clear(); + $im->destroy(); + + if (! is_file($png)) { + return null; + } + + return [ + 'path' => $png, + 'meta' => [ + 'applied' => true, + 'pipeline' => ['auto_orient', 'grayscale', 'normalize', 'sharpen', 'deskew'], + ], + ]; + } catch (\Throwable) { + return null; + } + } + + /** + * Erste funktionierende Tesseract-Binary: konfiguriert, dann PATH, dann typische Windows-Pfade. + */ + private function resolveBinary(): ?string + { + foreach ($this->candidateBinaries() as $path) { + if ($this->binaryResponds($path)) { + return $path; + } + } + + return null; + } + + /** + * @return list + */ + private function candidateBinaries(): array + { + $candidates = []; + $configured = config('app.receipt_ocr_bin'); + if (is_string($configured) && trim($configured) !== '') { + $candidates[] = trim($configured); + } + $candidates[] = 'tesseract'; + if (DIRECTORY_SEPARATOR === '\\' || PHP_OS_FAMILY === 'Windows') { + $candidates[] = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'; + $candidates[] = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'; + } + + return array_values(array_unique($candidates, SORT_STRING)); + } + + private function binaryResponds(string $bin): bool + { + $command = sprintf('%s --version 2>&1', $this->escapeExecutable($bin)); + $output = []; + $exitCode = 0; + @exec($command, $output, $exitCode); + + return $exitCode === 0; + } + + private function escapeExecutable(string $bin): string + { + if (str_contains($bin, ' ')) { + return '"'.str_replace('"', '\"', $bin).'"'; + } + + return escapeshellcmd($bin); + } + + /** + * Hilfe beim Debug: Zeilen (v. a. Zeile 2) vs. Keyword "spar" und Ergebnis von guessStoreName. + * + * @return array + */ + private function buildStoreGuessDebug(string $text, ?string $chosenStore): array + { + $lines = preg_split('/\R+/', $text) ?: []; + $headerLines = array_slice($lines, 0, 25); + $rows = []; + foreach ($headerLines as $idx => $rawLine) { + $n = $idx + 1; + $candidate = trim($rawLine); + $lower = mb_strtolower($candidate); + $rows[] = [ + 'line_no' => $n, + 'raw' => $candidate, + 'lower' => $lower, + 'normalized_for_match' => $this->normalizeForStoreMatch($lower), + 'matches_spar_keyword' => $this->lineContainsStoreKeyword($lower, 'spar'), + ]; + } + + $line2 = $rows[1] ?? null; + + return [ + 'chosen_store' => $chosenStore, + 'spar_probe_keyword' => 'spar', + 'line_2' => $line2, + 'line_2_matches_spar' => is_array($line2) ? ($line2['matches_spar_keyword'] ?? false) : null, + 'header_lines' => $rows, + 'store_keywords_order' => array_map( + fn (array $r) => $r['normalized'], + $this->storesForReceiptMatching() + ), ]; } private function guessStoreName(string $text): ?string { - foreach (preg_split('/\R+/', $text) ?: [] as $line) { + $lines = preg_split('/\R+/', $text) ?: []; + $headerLines = array_slice($lines, 0, 25); + $storeKeywords = $this->storesForReceiptMatching(); + + foreach ($headerLines as $line) { + $candidate = trim($line); + if ($candidate === '') { + continue; + } + $normalized = mb_strtolower($candidate); + foreach ($storeKeywords as $row) { + if (! $this->lineContainsStoreKeyword($normalized, $row['normalized'])) { + continue; + } + $cleaned = $this->cleanStoreHeaderLine($candidate); + if ($cleaned !== '' && mb_strlen($cleaned) <= 80) { + return $cleaned; + } + + return $row['display_name']; + } + } + + foreach ($headerLines as $line) { $candidate = trim($line); if ($candidate === '' || mb_strlen($candidate) < 2 || mb_strlen($candidate) > 60) { continue; @@ -100,9 +418,109 @@ class ReceiptOcrService return null; } + /** + * Geschäftsnamen aus der Tabelle `stores`; laengere Namen zuerst (interspar vor spar). + * Wenn noch leer: kleine Fallback-Liste. + * + * @return list + */ + private function storesForReceiptMatching(): array + { + $fromDb = Store::query() + ->get(['name', 'normalized_name']) + ->sortByDesc(fn (Store $s) => mb_strlen((string) $s->normalized_name)) + ->values(); + + $out = []; + $seen = []; + foreach ($fromDb as $store) { + $norm = trim((string) $store->normalized_name); + if ($norm === '') { + continue; + } + $seen[$norm] = true; + $out[] = [ + 'normalized' => $norm, + 'display_name' => trim((string) $store->name), + ]; + } + + // Immer mit robusten Basis-Ketten auffuellen, nicht nur bei leerer DB-Liste. + foreach (['interspar', 'eurospar', 'spar', 'lidl', 'hofer', 'billa', 'penny', 'dm'] as $slug) { + if (isset($seen[$slug])) { + continue; + } + $out[] = [ + 'normalized' => $slug, + 'display_name' => mb_strtoupper($slug), + ]; + } + + usort($out, fn ($a, $b) => mb_strlen($b['normalized']) <=> mb_strlen($a['normalized'])); + + return $out; + } + + /** + * Verrauschte Kopfzeile (z. B. "eS SPAR SM Göfis") fuer Anzeige bereinigen. + */ + private function cleanStoreHeaderLine(string $line): string + { + $line = trim($line); + $line = preg_replace('/^[\s\)\(]+/u', '', $line) ?? $line; + $line = preg_replace('/^(e[sS]|ES)\s+/u', '', $line) ?? $line; + $line = preg_replace('/^(ne|Ne)\s+/u', '', $line) ?? $line; + + return trim($line); + } + + private function lineContainsStoreKeyword(string $normalizedLower, string $storeKeyword): bool + { + $line = $this->normalizeForStoreMatch($normalizedLower); + $keyword = $this->normalizeForStoreMatch($storeKeyword); + $kw = preg_quote($keyword, '/'); + if (preg_match('/(? 's', + '$' => 's', + '§' => 's', + '0' => 'o', + '1' => 'i', + '|' => 'i', + ]); + + if (preg_match('/(?, from_cache:bool, fetched_at:string|null} + */ + public function search(string $query, int $limit = 5, int $ttlMinutes = 720): array + { + $query = trim($query); + if ($query === '') { + return [ + 'results' => [], + 'from_cache' => false, + 'fetched_at' => null, + ]; + } + + $cacheKey = 'store_search:spar:'.md5(mb_strtolower($query).'|'.$limit); + $cached = Cache::get($cacheKey); + if (is_array($cached)) { + return [ + 'results' => $cached['results'] ?? [], + 'from_cache' => true, + 'fetched_at' => $cached['fetched_at'] ?? null, + ]; + } + + try { + $response = Http::timeout(10) + ->acceptJson() + ->withHeaders([ + 'User-Agent' => 'Mozilla/5.0 (compatible; EinkaufslisteBot/1.0)', + ]) + ->get('https://bfs-geo.spar-ics.com/fact-finder/rest/v5/search/products_at', [ + 'query' => $query, + 'page' => 1, + 'hitsPerPage' => $limit, + 'sid' => $this->sessionId(), + 'useAsn' => 'false', + 'marketId' => 'NATIONAL', + 'showPermutedSearchParams' => 'true', + ]); + + if (! $response->successful()) { + return [ + 'results' => [], + 'from_cache' => false, + 'fetched_at' => null, + ]; + } + + $results = $this->mapProductsFromApi($response->json(), $limit); + $payload = [ + 'results' => $results, + 'fetched_at' => Carbon::now()->toIso8601String(), + ]; + Cache::put($cacheKey, $payload, now()->addMinutes($ttlMinutes)); + + return [ + 'results' => $results, + 'from_cache' => false, + 'fetched_at' => $payload['fetched_at'], + ]; + } catch (\Throwable) { + return [ + 'results' => [], + 'from_cache' => false, + 'fetched_at' => null, + ]; + } + } + + /** + * @return array + */ + private function mapProductsFromApi(mixed $payload, int $limit): array + { + if (! is_array($payload)) { + return []; + } + + $hits = $payload['hits'] ?? []; + if (! is_array($hits)) { + return []; + } + + $results = []; + foreach ($hits as $hit) { + if (! is_array($hit) || ! isset($hit['masterValues']) || ! is_array($hit['masterValues'])) { + continue; + } + + $master = $hit['masterValues']; + $name = trim((string) (($master['name1'] ?? '').' '.($master['name2'] ?? ''))); + if ($name === '') { + continue; + } + + $slug = isset($master['slug']) ? trim((string) $master['slug']) : ''; + $url = $slug !== '' ? 'https://www.spar.at/produkt/'.urlencode($slug) : null; + + $price = null; + $geo = $master['geoInformation'] ?? null; + if (is_array($geo) && isset($geo[0]['geoValues']) && is_array($geo[0]['geoValues'])) { + $geoValues = $geo[0]['geoValues']; + $basePrice = $geoValues['calculatedPrice'] ?? $geoValues['basePrice'] ?? null; + if ($basePrice !== null && is_numeric((string) $basePrice)) { + $price = number_format((float) $basePrice, 2, ',', '.').' EUR'; + } + } + + $results[] = [ + 'name' => $name, + 'price' => $price, + 'url' => $url, + ]; + + if (count($results) >= $limit) { + break; + } + } + + return $results; + } + + private function sessionId(): string + { + return base64_encode(uniqid('einkauf_', true)); + } +} diff --git a/config/app.php b/config/app.php index 423eed5..ff6c577 100644 --- a/config/app.php +++ b/config/app.php @@ -123,4 +123,22 @@ return [ 'store' => env('APP_MAINTENANCE_STORE', 'database'), ], + /* + |-------------------------------------------------------------------------- + | Receipt OCR (Tesseract) + |-------------------------------------------------------------------------- + | + | Optionaler voller Pfad zur tesseract-Binary, falls sie nicht im PATH von + | PHP/Webserver liegt (typisch unter Windows/WAMP). + | + */ + + 'receipt_ocr_bin' => env('RECEIPT_OCR_BIN'), + + /* + | Wenn true: raw_meta enthaelt store_guess_debug (Zeilen vs. Spar) beim OCR-Lauf. + */ + + 'receipt_ocr_debug_store' => (bool) env('RECEIPT_OCR_DEBUG_STORE', false), + ]; diff --git a/resources/views/receipt-scans/index.blade.php b/resources/views/receipt-scans/index.blade.php index 47eec8a..5e9f207 100644 --- a/resources/views/receipt-scans/index.blade.php +++ b/resources/views/receipt-scans/index.blade.php @@ -81,6 +81,17 @@
+ @if(! empty($scan->raw_meta['validated_at'])) + @php + $validatedCount = count($scan->raw_meta['validated_items'] ?? []); + $validatedAtFormatted = \Illuminate\Support\Carbon::parse($scan->raw_meta['validated_at'])->format('d.m.Y H:i'); + @endphp +
+ Bereits in die Liste uebernommen: + {{ $validatedCount }} Position{{ $validatedCount === 1 ? '' : 'en' }} + · {{ $validatedAtFormatted }} +
+ @endif
@csrf @method('PATCH') @@ -103,10 +114,135 @@
+ @if(! $scan->ocr_text && is_string($scan->raw_meta['hint'] ?? null) && $scan->raw_meta['hint'] !== '') +
+ {{ $scan->raw_meta['hint'] }} +
+ @endif +
+
+ @csrf + +
+ @if($ocrAvailable && ! $scan->ocr_text) + Ohne neues Foto; z. B. nach Tesseract-Pfad oder OCR-Update. + @endif +
OCR-Text anzeigen
{{ $scan->ocr_text ?: 'Kein OCR-Text vorhanden.' }}
+ @if(! empty($scan->raw_meta['store_guess_debug'])) +
+ Debug: Geschaeft (Zeile 2 vs. Spar) +
{{ json_encode($scan->raw_meta['store_guess_debug'], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE) }}
+
+ @endif + @php + $suggestions = collect($scan->item_suggestions ?? []); + $certainCount = $suggestions->where('is_uncertain', false)->count(); + $uncertainCount = $suggestions->where('is_uncertain', true)->count(); + $appliedLowerKeys = collect($scan->raw_meta['validated_items'] ?? []) + ->map(fn ($r) => mb_strtolower(trim($r['label'] ?? ''))) + ->filter() + ->all(); + $appliedSet = array_flip($appliedLowerKeys); + @endphp +
+

+ Erkannte Artikel (Vorschlaege): {{ $certainCount }} + @if($uncertainCount > 0) + · Unsicher: {{ $uncertainCount }} + @endif +

+ @if($suggestions->isNotEmpty()) +

+ Pro Zeile Haken setzen, um sie als erledigten Eintrag zu uebernehmen; ohne Haken wird die Zeile ignoriert. + Unsichere Treffer sind markiert und standardmaessig nicht angehakt. + Bei Einrueckung (z. B. Biskotten, darunter „2 x 1,49“): Menge und Einzelpreis werden erkannt; Gesamt daneben ist nur Kontrolle. + Buchstaben A/B/E hinter dem Betrag sind nur MwSt-Kennzeichen (kein Teil des Preises). +

+
+ @csrf +
+ @foreach($suggestions as $sug) + @php + $label = is_array($sug) ? ($sug['label'] ?? '') : (string) $sug; + $priceRaw = is_array($sug) ? ($sug['price_raw'] ?? '') : ''; + $qtyRaw = is_array($sug) ? ($sug['quantity_raw'] ?? '') : ''; + $isUncertain = (bool) (is_array($sug) ? ($sug['is_uncertain'] ?? false) : false); + $rowIndex = $loop->index; + $listKey = mb_strtolower(trim($label)); + $listStatus = $listProductLookup[$listKey] ?? null; + $wasAppliedFromReceipt = $listKey !== '' && isset($appliedSet[$listKey]); + @endphp +
+ +
+ @if($isUncertain) + unsicher + @endif + @if($wasAppliedFromReceipt) + uebernommen + @elseif($listStatus === 'open') + Liste offen + @elseif($listStatus === 'done') + Liste erledigt + @endif +
+ + + +
+ @endforeach +
+
+ +
+
+ @else +

+ Keine sicheren Artikel erkannt. Du kannst OCR-Text manuell pruefen. +

+ @endif +
@if(($scan->raw_meta['error'] ?? null) === 'ocr_unavailable')

OCR war beim Upload nicht verfuegbar.

@endif diff --git a/routes/web.php b/routes/web.php index 6653ef5..3f9fb21 100644 --- a/routes/web.php +++ b/routes/web.php @@ -28,6 +28,8 @@ Route::middleware(['auth', 'verified'])->group(function () { Route::get('/receipt-scans', [ReceiptScanController::class, 'index'])->name('receipt-scans.index'); Route::post('/receipt-scans', [ReceiptScanController::class, 'store'])->name('receipt-scans.store'); Route::patch('/receipt-scans/{receiptScan}', [ReceiptScanController::class, 'update'])->name('receipt-scans.update'); + Route::post('/receipt-scans/{receiptScan}/ocr-reprocess', [ReceiptScanController::class, 'reprocessOcr'])->name('receipt-scans.ocr-reprocess'); + Route::post('/receipt-scans/{receiptScan}/apply-items', [ReceiptScanController::class, 'applyItems'])->name('receipt-scans.apply-items'); }); Route::middleware('auth')->group(function () {