From 47e44ec5e94e849f2b6b628028b6c50f0c0eadc5 Mon Sep 17 00:00:00 2001
From: ergosteur <1992147+ergosteur@users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:03:54 -0500
Subject: [PATCH] feat: improve archive parsing, add .json.xz support, and fix
profile pic display
---
.gitignore | 2 +
GEMINI.md | 57 +++++++++++++++++++
package-lock.json | 12 +++-
package.json | 3 +-
src/App.tsx | 137 ++++++++++++++++++++++++++++++++--------------
5 files changed, 169 insertions(+), 42 deletions(-)
create mode 100644 GEMINI.md
diff --git a/.gitignore b/.gitignore
index 5a86d2a..f60d81c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@ coverage/
*.log
.env*
!.env.example
+_sample-archives
+_gemini-plans
diff --git a/GEMINI.md b/GEMINI.md
new file mode 100644
index 0000000..eb36080
--- /dev/null
+++ b/GEMINI.md
@@ -0,0 +1,57 @@
+# GEMINI.md
+
+## Project Overview
+
+**InstaArchive** is a high-performance, React-based Progressive Web App (PWA) designed to browse and explore archived Instagram data with a native-feeling interface. It allows users to load their local Instagram archive directories (either official Instagram exports or Instaloader format) and browse posts, reels, and stories in a modern, responsive grid view.
+
+### Key Features
+- **Local Archive Loading:** Uses the `webkitdirectory` API to scan and process local files securely on the client-side.
+- **Support for Multiple Formats:** Recognizes official Instagram export structures and Instaloader regex-based naming conventions.
+- **Dynamic Media Grid:** Customizable grid layouts (1:1 and 3:4 aspect ratios) with adjustable offsets ("bumps") for aesthetic alignment.
+- **Story Viewer:** Native-like story experience with progress bars, automated playback, and touch/click navigation.
+- **Post Detail Modal:** Comprehensive view for posts with media carousels, captions, and seamless navigation between posts.
+- **Video Support:** Automatic video thumbnail generation and playback.
+- **PWA Ready:** Built with `vite-plugin-pwa` for offline capabilities and a standalone application experience.
+
+### Main Technologies
+- **Frontend:** React 19, Vite, TypeScript
+- **Styling:** Tailwind CSS (v4)
+- **Icons:** Lucide React
+- **Animations:** Framer Motion (`motion/react`)
+- **Utility:** Date-fns, clsx, tailwind-merge
+- **PWA:** vite-plugin-pwa
+
+*Note: While `better-sqlite3`, `express`, and `@google/genai` are listed in `package.json`, the current core application logic is entirely frontend-driven.*
+
+## Building and Running
+
+### Prerequisites
+- Node.js (Latest LTS recommended)
+- npm or yarn
+
+### Commands
+- `npm install`: Install project dependencies.
+- `npm run dev`: Start the local development server on port 3000.
+- `npm run build`: Generate the production-ready build in the `dist` folder.
+- `npm run preview`: Locally preview the production build.
+- `npm run lint`: Execute TypeScript type-checking (`tsc --noEmit`).
+- `npm run clean`: Remove the `dist` directory.
+
+### Environment Variables
+- `GEMINI_API_KEY`: Required for AI-integrated features (though currently unutilized in core logic).
+- `DISABLE_HMR`: If set to `true` (standard in AI Studio environments), Hot Module Replacement is disabled to prevent flickering during agent-driven edits.
+
+## Development Conventions
+
+- **Component Architecture:** Functional components with modern hooks. All main application logic currently resides in `src/App.tsx`.
+- **Styling Strategy:** Utility-first CSS using Tailwind CSS v4. Styles are largely co-located with components.
+- **Type Safety:** Strict TypeScript usage for all components and utility functions.
+- **Iconography:** Consistent use of `lucide-react` for all UI icons.
+- **Animations:** Smooth transitions and gestures powered by `motion/react`.
+- **File Handling:** Privacy-focused client-side scanning of archive directories using browser-native APIs.
+- **Project Structure:**
+ - `src/App.tsx`: Main entry point and core application logic.
+ - `src/main.tsx`: React DOM mounting.
+ - `src/index.css`: Global styles and Tailwind imports.
+ - `vite.config.ts`: Project build and PWA configuration.
+ - `metadata.json`: Project description for external integration tools.
diff --git a/package-lock.json b/package-lock.json
index 0a6daca..2968a55 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -21,7 +21,8 @@
"react": "^19.0.0",
"react-dom": "^19.0.0",
"tailwind-merge": "^3.5.0",
- "vite": "^6.2.0"
+ "vite": "^6.2.0",
+ "xz-decompress": "^0.2.3"
},
"devDependencies": {
"@types/express": "^4.17.21",
@@ -9192,6 +9193,15 @@
}
}
},
+ "node_modules/xz-decompress": {
+ "version": "0.2.3",
+ "resolved": "https://registry.npmjs.org/xz-decompress/-/xz-decompress-0.2.3.tgz",
+ "integrity": "sha512-O8v6HG8T0PrKBcpyWA13GkSYWFvncwzuzcLx5A7++l3HsE3atmoetXjIxrZ/JV/nbvSZ7WS4+3XvREZuVn+rEA==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=16"
+ }
+ },
"node_modules/yallist": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
diff --git a/package.json b/package.json
index e066199..4cf002d 100644
--- a/package.json
+++ b/package.json
@@ -24,7 +24,8 @@
"react": "^19.0.0",
"react-dom": "^19.0.0",
"tailwind-merge": "^3.5.0",
- "vite": "^6.2.0"
+ "vite": "^6.2.0",
+ "xz-decompress": "^0.2.3"
},
"devDependencies": {
"@types/express": "^4.17.21",
diff --git a/src/App.tsx b/src/App.tsx
index 556c874..69486e0 100644
--- a/src/App.tsx
+++ b/src/App.tsx
@@ -22,6 +22,8 @@ import { motion, AnimatePresence } from 'motion/react';
import { clsx, type ClassValue } from 'clsx';
import { twMerge } from 'tailwind-merge';
import { format, parseISO } from 'date-fns';
+// @ts-ignore
+import { XzReadableStream } from 'xz-decompress';
// --- Utilities ---
function cn(...inputs: ClassValue[]) {
@@ -332,7 +334,8 @@ const PostModal = ({
onNextPost,
onPrevPost,
hasNextPost,
- hasPrevPost
+ hasPrevPost,
+ profilePic
}: {
post: Post;
onClose: () => void;
@@ -341,6 +344,7 @@ const PostModal = ({
onPrevPost?: () => void;
hasNextPost?: boolean;
hasPrevPost?: boolean;
+ profilePic: string | null;
}) => {
const [currentIndex, setCurrentIndex] = useState(0);
const [isFullView, setIsFullView] = useState(initialFullView);
@@ -562,8 +566,12 @@ const PostModal = ({
-
-
{post.username[0]}
+
+ {profilePic ? (
+

+ ) : (
+
{post.username[0]}
+ )}
@@ -574,8 +582,12 @@ const PostModal = ({
-
- {post.username[0]}
+
+ {profilePic ? (
+

+ ) : (
+
{post.username[0]}
+ )}
{post.username}
@@ -681,12 +693,24 @@ export default function App() {
await new Promise(resolve => setTimeout(resolve, 100));
+ const parseXZFile = async (file: File) => {
+ try {
+ const decompressedStream = new XzReadableStream(file.stream());
+ const response = new Response(decompressedStream);
+ return await response.json();
+ } catch (e) {
+ console.error("Error decompressing XZ file:", file.name, e);
+ return null;
+ }
+ };
+
try {
const postsMap = new Map
>();
const mediaFilesMap = new Map();
// Format 1: Instagram Export (e.g., 2021-01-01_username - ID - 1.jpg)
- const exportRegex = /^(\d{4}-\d{2}-\d{2})_(.+?) - ([a-zA-Z0-9_-]+)(?: - (\d+))?(?: - (story))?\.(.+)$/;
+ // Updated to be slightly more permissive with the shortcode/ID part
+ const exportRegex = /^(\d{4}-\d{2}-\d{2})_(.+?) - (.+?)(?: - (\d+))?(?: - (story))?\.(.+)$/;
// Format 2: Instaloader (e.g., 2017-03-31_12-42-56_UTC.jpg or 2020-12-05_22-11-27_UTC_1.jpg)
const instaloaderRegex = /^(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}_UTC)(?:_(\d+))?(?:_(story))?\.(.+)$/;
@@ -698,10 +722,13 @@ export default function App() {
const productType = obj.product_type || "";
return (
obj.is_story === true ||
+ obj.is_reel_media === true ||
typeName.includes('Story') ||
obj.audience === "MediaAudience.DEFAULT" ||
nodeType === "StoryItem" ||
- productType === "story"
+ productType === "story" ||
+ typeName === "GraphStoryVideo" ||
+ typeName === "GraphStoryImage"
);
};
@@ -709,27 +736,26 @@ export default function App() {
let format: 'export' | 'instaloader' | 'json' | 'unknown' = 'unknown';
let jsonFiles: File[] = [];
- // First pass: detect format and collect files
+ // First pass: detect format, username, and collect files
for (let i = 0; i < files.length; i++) {
const file = files[i];
const lowerName = file.name.toLowerCase();
- // Check for official JSON format or profile JSON
- if (lowerName.endsWith('.json')) {
+ // Check for official JSON format or profile JSON (including .xz)
+ if (lowerName.endsWith('.json') || lowerName.endsWith('.json.xz')) {
jsonFiles.push(file);
if (lowerName.includes('posts_1') || lowerName.includes('reels_1') || lowerName.includes('stories_1')) {
format = 'json';
} else if (format === 'unknown' && (lowerName.includes('story') || lowerName.includes('post'))) {
- // Likely Instaloader or other JSON-per-post format
format = 'json';
}
continue;
}
- // Check for profile pic in Instaloader format
+ // Check for profile pic in Instaloader format or generic username.jpg
if (lowerName.includes('_profile_pic.jpg')) {
setProfilePic(URL.createObjectURL(file));
- if (file.webkitRelativePath) {
+ if (!detectedUsername && file.webkitRelativePath) {
const parts = file.webkitRelativePath.split('/');
if (parts.length > 1) {
detectedUsername = parts[0];
@@ -742,8 +768,10 @@ export default function App() {
const exportMatch = file.name.match(exportRegex);
if (exportMatch) {
- detectedUsername = exportMatch[2];
- setUsername(detectedUsername);
+ if (!detectedUsername) {
+ detectedUsername = exportMatch[2];
+ setUsername(detectedUsername);
+ }
format = 'export';
}
@@ -761,7 +789,6 @@ export default function App() {
// Store all media files for JSON format lookup
if (['jpg', 'jpeg', 'png', 'webp', 'mp4'].some(ext => lowerName.endsWith(ext))) {
- // Store by relative path or just name
const key = file.webkitRelativePath || file.name;
mediaFilesMap.set(key, file);
}
@@ -769,12 +796,15 @@ export default function App() {
console.log(`Detected format: ${format}, Username: ${detectedUsername}`);
- if (format === 'json') {
- // Handle JSON format (Official Instagram Export)
+ if (format === 'json' || format === 'instaloader') {
+ // Handle JSON format (Official Instagram Export or Instaloader)
for (const jsonFile of jsonFiles) {
try {
- const content = await jsonFile.text();
- const data = JSON.parse(content);
+ const data = jsonFile.name.endsWith('.xz')
+ ? await parseXZFile(jsonFile)
+ : JSON.parse(await jsonFile.text());
+
+ if (!data) continue;
// Check if it's a profile JSON
if (data.node && (data.instaloader?.node_type === 'Profile' || data.node.__typename === 'User')) {
@@ -790,8 +820,6 @@ export default function App() {
continue;
}
- // Official Instagram JSON structure is usually an array of objects
- // Instaloader JSON is usually a single object
const items = Array.isArray(data) ? data : (data.media || [data]);
const isStoriesFile = jsonFile.name.toLowerCase().includes('stories');
@@ -808,6 +836,8 @@ export default function App() {
checkIsStory(item) ||
checkIsStory(item.node) ||
checkIsStory(data.instaloader) ||
+ checkIsStory(item.node?.iphone_struct) ||
+ checkIsStory(item.iphone_struct) ||
(item.media && Array.isArray(item.media) && item.media.some((m: any) => checkIsStory(m)));
const post: Partial = {
@@ -864,7 +894,16 @@ export default function App() {
if (matchedFile) {
const url = URL.createObjectURL(matchedFile);
const type = matchedFile.name.toLowerCase().endsWith('mp4') ? 'video' : 'image';
- post.media!.push({ name: matchedFile.name, url, type, index: mIdx + 1 });
+ // Deduplication check logic
+ const existingMedia = post.media!.find(media => media.index === mIdx + 1);
+ if (existingMedia) {
+ if (type === 'video' && existingMedia.type === 'image') {
+ // Replace image with video
+ post.media = post.media!.map(media => media.index === mIdx + 1 ? { name: matchedFile!.name, url, type, index: mIdx + 1 } : media);
+ }
+ } else {
+ post.media!.push({ name: matchedFile.name, url, type, index: mIdx + 1 });
+ }
}
});
@@ -876,7 +915,9 @@ export default function App() {
console.error("Error parsing JSON file:", jsonFile.name, e);
}
}
- } else {
+ }
+
+ if (format !== 'json') {
// Handle Regex formats (Export or Instaloader)
let matchedCount = 0;
const CHUNK_SIZE = 100;
@@ -888,7 +929,8 @@ export default function App() {
const file = files[j];
const lowerName = file.name.toLowerCase();
- if (format === 'export' && detectedUsername && lowerName === `${detectedUsername.toLowerCase()}.jpg`) {
+ // Check for potential profile pic (username.jpg)
+ if (detectedUsername && lowerName === `${detectedUsername.toLowerCase()}.jpg`) {
setProfilePic(URL.createObjectURL(file));
continue;
}
@@ -914,8 +956,11 @@ export default function App() {
const match = file.name.match(instaloaderRegex);
if (!match) continue;
const [_, postIdMatch, indexStrMatch, storyMatch, extMatch] = match;
+ // Group 1 is the consistent timestamp part (e.g. 2022-03-31_14-56-28_UTC)
+ // This is the correct ID to group .jpg, .mp4, and .json.xz files.
postId = postIdMatch;
- date = postId.split('_')[0];
+
+ date = postIdMatch.split('_')[0];
index = indexStrMatch ? parseInt(indexStrMatch, 10) : 1;
if (storyMatch) isStory = true;
ext = extMatch;
@@ -937,7 +982,6 @@ export default function App() {
};
postsMap.set(postId, post);
} else if (isStory) {
- // Update isStory flag if any file associated with this post indicates it's a story
post.isStory = true;
}
@@ -945,39 +989,51 @@ export default function App() {
if (lowerExt === 'txt') {
const text = await file.text();
post.caption = text;
- } else if (lowerExt === 'json') {
+ } else if (lowerExt === 'json' || lowerName.endsWith('.json.xz')) {
try {
- const content = await file.text();
- const data = JSON.parse(content);
+ const data = lowerName.endsWith('.xz')
+ ? await parseXZFile(file)
+ : JSON.parse(await file.text());
- // Extract caption
+ if (!data) continue;
+
const node = data.node || data;
+ const iphone = node.iphone_struct || {};
const captionText = node.edge_media_to_caption?.edges?.[0]?.node?.text ||
node.caption?.text ||
- node.iphone_struct?.caption?.text || "";
+ iphone.caption?.text || "";
if (captionText) post.caption = captionText;
- // Update isStory if JSON confirms it
- if (checkIsStory(data) || checkIsStory(data.node) || checkIsStory(data.instaloader)) {
+ if (checkIsStory(data) || checkIsStory(node) || checkIsStory(data.instaloader) || checkIsStory(iphone)) {
post.isStory = true;
}
- // Profile info check in case it's a profile JSON
+ // Check if this JSON contains profile info
if (data.node && (data.instaloader?.node_type === 'Profile' || data.node.__typename === 'User')) {
const n = data.node;
- const iphone = n.iphone_struct || {};
+ const iph = n.iphone_struct || {};
setUsername(n.username || '');
setFullName(n.full_name || '');
- setBio(n.biography || iphone.biography || '');
+ setBio(n.biography || iph.biography || '');
setExternalUrl(n.external_url || '');
- setFollowerCount(n.edge_followed_by?.count || iphone.follower_count || 0);
- setFollowingCount(n.edge_follow?.count || iphone.following_count || 0);
+ setFollowerCount(n.edge_followed_by?.count || iph.follower_count || 0);
+ setFollowingCount(n.edge_follow?.count || iph.following_count || 0);
}
} catch (e) {}
} else if (['jpg', 'jpeg', 'png', 'webp', 'mp4'].includes(lowerExt)) {
const url = URL.createObjectURL(file);
const type = lowerExt === 'mp4' ? 'video' : 'image';
- post.media!.push({ name: file.name, url, type, index });
+
+ const existingMedia = post.media!.find(m => m.index === index);
+ if (existingMedia) {
+ // If we have an image and find a video for the same index, replace it
+ if (type === 'video' && existingMedia.type === 'image') {
+ post.media = post.media!.map(m => m.index === index ? { name: file.name, url, type, index } : m);
+ }
+ // If we have a video and find an image, do nothing (keep video)
+ } else {
+ post.media!.push({ name: file.name, url, type, index });
+ }
}
}
await new Promise(resolve => setTimeout(resolve, 0));
@@ -1325,6 +1381,7 @@ export default function App() {
onPrevPost={onPrevPost}
hasNextPost={postIndex < filteredPosts.length - 1}
hasPrevPost={postIndex > 0}
+ profilePic={profilePic}
/>
)}