diff --git a/Sources/CodeMapper/CFamilyDeclarationParser.swift b/Sources/CodeMapper/CFamilyDeclarationParser.swift new file mode 100644 index 0000000..198b86b --- /dev/null +++ b/Sources/CodeMapper/CFamilyDeclarationParser.swift @@ -0,0 +1,186 @@ +import Foundation + +/// Declaration parsing for Objective-C / Objective-C++ / C / C++ sources +/// (driven by clangd via sourcekit-lsp's C-family multiplexing). Mirrors +/// `SwiftDeclarationParser`'s contract but speaks C-family grammar: +/// `@interface`/`@implementation`/`@protocol`/`@property`, `-`/`+` method +/// declarations, plain functions, structs, enums, typedefs. +struct CFamilyDeclarationParser: DeclarationParser { + func extractSignature(sourceBytes: [UInt8], startOffset: Int) -> String { + let end = declarationEnd(sourceBytes: sourceBytes, startOffset: startOffset) + return collapseWhitespace(sourceBytes[startOffset.. Int { + let prefix = collapseWhitespace(sourceBytes[startOffset.."): if depth > 0 { depth -= 1 } + case UInt8(ascii: ";"), UInt8(ascii: "{"): + if depth == 0 && !isInterfaceHeader { return i } + case UInt8(ascii: "\n"): + if depth == 0 && isInterfaceHeader { return i } + default: break + } + i += 1 + } + return i + } + + func parse(_ sig: String, lspKind: Int, name: String) -> ParsedDecl { + var result = ParsedDecl( + kind: classify(sig, lspKind: lspKind), + accessLevel: "", // C-family headers are public-by-inclusion; no access modifiers to report + isAsync: false, + isThrows: false, + isExtension: false, + returnType: nil, + superclass: nil, + conformances: [] + ) + + switch result.kind { + case .class, .protocol: + (result.superclass, result.conformances) = parseInheritance(sig, name: name) + // `@interface Name (Category)` / `@interface Name ()` and matching + // `@implementation` blocks extend an existing type rather than + // declaring a new one; report them as extensions instead. + if result.superclass == nil, result.conformances.isEmpty, + let baseType = categoryOrExtensionBaseType(sig) { + result.kind = .extension + result.isExtension = true + result.superclass = nil + result.conformances = [] + result.returnType = baseType + } + case .function, .initializer: + result.returnType = methodOrFunctionReturnType(sig) + default: + break + } + + return result + } + + // MARK: - Classification + + private func classify(_ sig: String, lspKind: Int) -> SymbolKind { + let trimmed = sig.trimmingCharacters(in: .whitespaces) + if trimmed.hasPrefix("@interface") || trimmed.hasPrefix("@implementation") { return .class } + if trimmed.hasPrefix("@protocol") { return .protocol } + if trimmed.hasPrefix("@property") { return .property } + if trimmed.hasPrefix("- ") || trimmed.hasPrefix("+ ") { + return trimmed.contains("init") && (trimmed.hasPrefix("- (instancetype)init") || trimmed.hasPrefix("- (nullable instancetype)init")) + ? .initializer + : .function + } + if trimmed.hasPrefix("typedef") { return .struct } + + // Fall back to the LSP's own classification (these numbers are the + // standard LSP `SymbolKind` values — same enum Swift's parser maps). + switch lspKind { + case 5, 23: return .class + case 6, 9, 12: return .function + case 7, 8, 13: return .property + case 10: return .enum + case 11: return .protocol + default: + if trimmed.hasPrefix("struct ") { return .struct } + if trimmed.hasPrefix("enum ") { return .enum } + if trimmed.hasPrefix("class ") { return .class } + return .unknown + } + } + + // MARK: - `@interface Name : Super ` / `@protocol Name ` + + private func parseInheritance(_ sig: String, name: String) -> (superclass: String?, conformances: [String]) { + guard let nameRange = sig.range(of: name) else { return (nil, []) } + var rest = sig[nameRange.upperBound...] + + var superclass: String? + if let colon = rest.firstIndex(of: ":") { + let beforeAngle = rest[rest.startIndex..") , angleStart < angleEnd { + let inner = rest[rest.index(after: angleStart).. String? { + let trimmed = sig.trimmingCharacters(in: .whitespacesAndNewlines) + let rest: Substring + let requiresNamedCategory: Bool + if trimmed.hasPrefix("@interface ") { + rest = trimmed.dropFirst("@interface ".count) + requiresNamedCategory = false + } else if trimmed.hasPrefix("@implementation ") { + rest = trimmed.dropFirst("@implementation ".count) + requiresNamedCategory = true + } else { + rest = Substring(trimmed) + requiresNamedCategory = false + } + + guard let open = rest.firstIndex(of: "("), + let close = rest[open...].firstIndex(of: ")") + else { return nil } + + let baseType = rest[.. String? { + let trimmed = sig.trimmingCharacters(in: .whitespaces) + if trimmed.hasPrefix("- ") || trimmed.hasPrefix("+ ") { + // Method: return type is the first parenthesized group. + guard let open = trimmed.firstIndex(of: "("), let close = trimmed.firstIndex(of: ")"), open < close else { return nil } + let type = trimmed[trimmed.index(after: open)..` passed through to it. We write ours to a +/// scratch directory and pass that path through (see `LSPClient`). +struct CompilationDatabaseWriter { + let directory: String + + /// Writes entries for every C-family file in `files`. Returns the + /// directory containing `compile_commands.json`, or `nil` if there were + /// no C-family files to register (nothing for clangd to do). + @discardableResult + func write(files: [(filePath: String, targetName: String, language: SourceLanguage)]) throws -> CompilationDatabaseContext? { + let shimDir = try ObjectiveCShimWriter(directory: directory + "/objc-shims").write() + let cFamilyFiles = files.filter { + if case .cFamily = $0.language { return true } + return false + } + let includeDirsByTarget = Dictionary(grouping: cFamilyFiles, by: \.targetName).mapValues { grouped in + Set(grouped.map { ($0.filePath as NSString).deletingLastPathComponent }).sorted() + } + + var entries: [[String: Any]] = [] + var parsePathByOriginalPath: [String: String] = [:] + for (filePath, _, language) in files { + guard case .cFamily = language else { continue } + let ext = (filePath as NSString).pathExtension + guard let driverLanguage = SourceLanguage.clangDriverLanguage(forPath: filePath, fallbackExtension: ext) else { continue } + let dir = (filePath as NSString).deletingLastPathComponent + let targetName = files.first(where: { $0.filePath == filePath })?.targetName ?? "" + let includeDirs = includeDirsByTarget[targetName] ?? [dir] + let compileFilePath = try syntheticCompilePath(for: filePath, driverLanguage: driverLanguage) + var arguments = [ + "clang", + "-x", driverLanguage, + "-fsyntax-only", + "-fobjc-arc", + "-fblocks", + "-std=gnu++20", + "-I", shimDir + ] + for includeDir in includeDirs { + arguments.append(contentsOf: ["-I", includeDir]) + } + arguments.append(compileFilePath) + entries.append([ + "directory": dir, + "file": compileFilePath, + "arguments": arguments + ]) + parsePathByOriginalPath[filePath] = compileFilePath + } + guard !entries.isEmpty else { return nil } + + try FileManager.default.createDirectory(atPath: directory, withIntermediateDirectories: true) + let dbPath = directory + "/compile_commands.json" + let data = try JSONSerialization.data(withJSONObject: entries, options: [.prettyPrinted]) + try data.write(to: URL(fileURLWithPath: dbPath)) + return CompilationDatabaseContext(directory: directory, parsePathByOriginalPath: parsePathByOriginalPath) + } + + private func syntheticCompilePath(for originalPath: String, driverLanguage: String) throws -> String { + let synthRoot = directory + "/synthetic-tu" + try FileManager.default.createDirectory(atPath: synthRoot, withIntermediateDirectories: true) + + let fileName = (originalPath as NSString).lastPathComponent + let baseName = (fileName as NSString).deletingPathExtension + let ext = syntheticExtension(for: driverLanguage) + let parsePath = synthRoot + "/" + baseName + "." + ext + + let content = try String(contentsOfFile: originalPath, encoding: .utf8) + try content.write(toFile: parsePath, atomically: true, encoding: .utf8) + return parsePath + } + + private func syntheticExtension(for driverLanguage: String) -> String { + switch driverLanguage { + case "objective-c++", "objective-c++-header": + return "mm" + case "objective-c", "objective-c-header": + return "m" + case "c++", "c++-header": + return "cc" + default: + return "c" + } + } +} diff --git a/Sources/CodeMapper/DeclarationParser.swift b/Sources/CodeMapper/DeclarationParser.swift new file mode 100644 index 0000000..50dfd8a --- /dev/null +++ b/Sources/CodeMapper/DeclarationParser.swift @@ -0,0 +1,180 @@ +import Foundation + +/// Parsed shape of a single declaration's signature, independent of source language. +struct ParsedDecl { + var kind: SymbolKind + var accessLevel: String + var isAsync: Bool + var isThrows: Bool + var isExtension: Bool + var returnType: String? + var superclass: String? + var conformances: [String] +} + +/// Language-specific declaration parsing. `SymbolExtractor` walks the LSP's +/// `documentSymbol` tree (which is language-agnostic) and delegates the two +/// genuinely language-specific steps to a `DeclarationParser`: +/// 1. slicing the raw signature text out of the source (where does a +/// declaration's "header" end — at `{`, at `;`, ...), and +/// 2. interpreting that text (kind, access level, async/throws, return +/// type, superclass/conformances). +/// Add a new conformance (alongside `SwiftDeclarationParser`/ +/// `CFamilyDeclarationParser`) to teach CodeMapper another source language. +protocol DeclarationParser { + func extractSignature(sourceBytes: [UInt8], startOffset: Int) -> String + func parse(_ sig: String, lspKind: Int, name: String) -> ParsedDecl +} + +extension DeclarationParser { + /// Shared text cleanup: collapse all whitespace/newlines in a raw byte + /// slice into a single-line, single-spaced signature string. + func collapseWhitespace(_ sourceBytes: ArraySlice) -> String { + let raw = String(bytes: sourceBytes, encoding: .utf8) ?? "" + return raw.components(separatedBy: .whitespacesAndNewlines) + .filter { !$0.isEmpty } + .joined(separator: " ") + .trimmingCharacters(in: .whitespaces) + } +} + +// MARK: - Swift + +struct SwiftDeclarationParser: DeclarationParser { + func extractSignature(sourceBytes: [UInt8], startOffset: Int) -> String { + var parenDepth = 0 + var bracketDepth = 0 + var i = startOffset + while i < sourceBytes.count { + let b = sourceBytes[i] + if b == UInt8(ascii: "{") && parenDepth == 0 && bracketDepth == 0 { break } + // Stop at newline for protocol requirements / computed property stubs + if b == UInt8(ascii: "\n") && parenDepth == 0 && bracketDepth == 0 { + // Check if next non-whitespace is `{` — if not, this is a one-liner with no body + var j = i + 1 + while j < sourceBytes.count && (sourceBytes[j] == 0x20 || sourceBytes[j] == 0x09) { j += 1 } + if j < sourceBytes.count && sourceBytes[j] != UInt8(ascii: "{") { break } + } + switch b { + case UInt8(ascii: "("): parenDepth += 1 + case UInt8(ascii: ")"): if parenDepth > 0 { parenDepth -= 1 } + case UInt8(ascii: "["): bracketDepth += 1 + case UInt8(ascii: "]"): if bracketDepth > 0 { bracketDepth -= 1 } + default: break + } + i += 1 + } + return collapseWhitespace(sourceBytes[startOffset.. ParsedDecl { + var result = ParsedDecl( + kind: lspKindToSymbolKind(lspKind, sig: sig), + accessLevel: "", + isAsync: false, + isThrows: false, + isExtension: sig.hasPrefix("extension ") || sig.contains(" extension "), + returnType: nil, + superclass: nil, + conformances: [] + ) + + if sig.contains("public ") || sig.contains("open ") { + result.accessLevel = "pub" + } else if sig.contains("private ") || sig.contains("fileprivate ") { + result.accessLevel = "priv" + } + + result.isAsync = sig.contains(" async") || sig.contains(" async\n") + result.isThrows = sig.contains(" throws") || sig.contains(" rethrows") + + // Return type — after last `->` + if let arrowRange = sig.range(of: "->", options: .backwards) { + let candidate = String(sig[arrowRange.upperBound...]).trimmingCharacters(in: .whitespaces) + if !candidate.isEmpty { result.returnType = candidate } + } + + // Inheritance / conformances — after `:` in type/extension declarations + if result.kind == .class || result.kind == .struct || result.kind == .enum || + result.kind == .actor || result.kind == .protocol || result.isExtension { + if let colonRange = findColon(in: sig, after: name) { + let inherited = String(sig[sig.index(after: colonRange)...]) + .trimmingCharacters(in: .whitespaces) + let parts = inherited.components(separatedBy: ",") + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty && !$0.contains(" where ") } + .map { stripGenericConstraints($0) } + .filter { !$0.isEmpty } + + if result.kind == .class && !parts.isEmpty { + // Heuristic: first item is superclass for class declarations + let first = parts[0] + result.superclass = first + result.conformances = parts.count > 1 ? Array(parts[1...]) : [] + } else { + result.conformances = parts + } + } + } + + return result + } + + private func lspKindToSymbolKind(_ kind: Int, sig: String) -> SymbolKind { + if sig.hasPrefix("extension ") || sig.contains(" extension ") { return .extension } + switch kind { + case 5: // Class — also used for actors sometimes + if sig.contains("actor ") { return .actor } + return .class + case 6, 12: return .function // Method, Function + case 7, 8: return .property // Property, Field + case 9: return .initializer + case 10: return .enum + case 11: return .protocol // Interface + case 13: return .property // Variable + case 23: return .struct + default: + if sig.contains("actor ") { return .actor } + if sig.contains("struct ") { return .struct } + if sig.contains("class ") { return .class } + if sig.contains("enum ") { return .enum } + if sig.contains("protocol ") { return .protocol } + if sig.contains("func ") { return .function } + if sig.contains("init(") || sig.contains("init<") { return .initializer } + return .unknown + } + } + + private func findColon(in sig: String, after name: String) -> String.Index? { + // Find `:` that comes after the type name, outside angle brackets + var depth = 0 + var i = sig.startIndex + + if let nameRange = sig.range(of: name) { + i = nameRange.upperBound + } + + while i < sig.endIndex { + let c = sig[i] + switch c { + case "<": depth += 1 + case ">": if depth > 0 { depth -= 1 } + case ":": + if depth == 0 { return i } + case "(": + // Once we're inside parameter list, no more inheritance + return nil + default: break + } + i = sig.index(after: i) + } + return nil + } + + private func stripGenericConstraints(_ s: String) -> String { + if let whereRange = s.range(of: " where ") { + return String(s[.. [[String: Any]] { + diagnosticsByURI.removeValue(forKey: uri) ?? [] + } + private func sendNotification(_ method: String, params: Any) throws { let msg: [String: Any] = [ "jsonrpc": "2.0", diff --git a/Sources/CodeMapper/ObjectiveCShimWriter.swift b/Sources/CodeMapper/ObjectiveCShimWriter.swift new file mode 100644 index 0000000..ffba0dd --- /dev/null +++ b/Sources/CodeMapper/ObjectiveCShimWriter.swift @@ -0,0 +1,146 @@ +import Foundation + +/// Writes a minimal Foundation shim for mechanical Objective-C parsing on hosts +/// that do not have Apple's SDKs installed. The goal is not semantic fidelity +/// or successful compilation; it is just enough surface area for clangd to +/// continue lexing/parsing public headers and emit document symbols instead of +/// collapsing into recovery mode at `#import `. +struct ObjectiveCShimWriter { + let directory: String + + func write() throws -> String { + let foundationDir = directory + "/Foundation" + let objcDir = directory + "/objc" + try FileManager.default.createDirectory(atPath: foundationDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory(atPath: objcDir, withIntermediateDirectories: true) + + let header = """ + #pragma once + + #include + #include + + #ifndef __OBJC__ + #define __OBJC__ 1 + #endif + + typedef signed char BOOL; + #ifndef YES + #define YES ((BOOL)1) + #endif + #ifndef NO + #define NO ((BOOL)0) + #endif + + #ifndef nil + #define nil ((id)0) + #endif + #ifndef Nil + #define Nil ((Class)0) + #endif + + typedef struct objc_object *id; + typedef struct objc_class *Class; + typedef struct objc_selector *SEL; + typedef struct objc_protocol *Protocol; + + struct objc_object {}; + struct objc_class {}; + struct objc_selector {}; + struct objc_protocol {}; + + @protocol NSObject + @end + + @interface NSObject + @end + + @interface NSString : NSObject + @end + + @interface NSError : NSObject + @end + + @interface NSNumber : NSObject + @end + + @interface NSData : NSObject + @end + + @interface NSMutableData : NSData + @end + + @interface NSArray : NSObject + @end + + @interface NSMutableArray : NSArray + @end + + @interface NSDictionary : NSObject + @end + + @interface NSMutableDictionary : NSDictionary + @end + + @interface NSSet : NSObject + @end + + @interface NSMutableSet : NSSet + @end + + #ifndef NS_ASSUME_NONNULL_BEGIN + #define NS_ASSUME_NONNULL_BEGIN + #endif + #ifndef NS_ASSUME_NONNULL_END + #define NS_ASSUME_NONNULL_END + #endif + #ifndef NS_DESIGNATED_INITIALIZER + #define NS_DESIGNATED_INITIALIZER + #endif + #ifndef NS_UNAVAILABLE + #define NS_UNAVAILABLE __attribute__((unavailable)) + #endif + #ifndef NS_SWIFT_NAME + #define NS_SWIFT_NAME(_name) + #endif + #ifndef NS_ENUM + #define NS_ENUM(_type, _name) enum _name : _type; enum _name : _type + #endif + #ifndef NS_OPTIONS + #define NS_OPTIONS(_type, _name) enum _name : _type; enum _name : _type + #endif + #ifndef CF_ASSUME_NONNULL_BEGIN + #define CF_ASSUME_NONNULL_BEGIN + #endif + #ifndef CF_ASSUME_NONNULL_END + #define CF_ASSUME_NONNULL_END + #endif + """ + + let path = foundationDir + "/Foundation.h" + try header.write(toFile: path, atomically: true, encoding: .utf8) + + let objcHeader = """ + #pragma once + + typedef struct objc_object *id; + typedef struct objc_class *Class; + typedef struct objc_selector *SEL; + typedef struct objc_protocol *Protocol; + + struct objc_object {}; + struct objc_class {}; + struct objc_selector {}; + struct objc_protocol {}; + """ + try objcHeader.write(toFile: objcDir + "/objc.h", atomically: true, encoding: .utf8) + + let blocksHeader = """ + #pragma once + typedef void *Block; + """ + try blocksHeader.write(toFile: objcDir + "/blocks_runtime.h", atomically: true, encoding: .utf8) + + return directory + } +} diff --git a/Sources/CodeMapper/OutputWriter.swift b/Sources/CodeMapper/OutputWriter.swift index 3318224..6797895 100644 --- a/Sources/CodeMapper/OutputWriter.swift +++ b/Sources/CodeMapper/OutputWriter.swift @@ -6,11 +6,11 @@ struct OutputWriter { var includeCalls: Bool = true var outgoingOnly: Bool = false var pathFilter: String? = nil + var excludePaths: [String] = [] func printAll() throws { let allFiles = symbolTable.fileTargets.keys.sorted().filter { filePath in - guard let filter = pathFilter else { return true } - return filePath == filter || filePath.hasPrefix(filter + "/") + pathIsIncluded(filePath, includePath: pathFilter, excludePaths: excludePaths) } for filePath in allFiles { Swift.print(buildOutput(for: filePath)) @@ -19,8 +19,7 @@ struct OutputWriter { func writeAll() throws { let allFiles = symbolTable.fileTargets.keys.sorted().filter { filePath in - guard let filter = pathFilter else { return true } - return filePath == filter || filePath.hasPrefix(filter + "/") + pathIsIncluded(filePath, includePath: pathFilter, excludePaths: excludePaths) } for filePath in allFiles { let output = buildOutput(for: filePath) diff --git a/Sources/CodeMapper/PathFilter.swift b/Sources/CodeMapper/PathFilter.swift new file mode 100644 index 0000000..c7df6d2 --- /dev/null +++ b/Sources/CodeMapper/PathFilter.swift @@ -0,0 +1,17 @@ +import Foundation + +func pathIsIncluded(_ filePath: String, includePath: String?, excludePaths: [String]) -> Bool { + if let includePath, !pathMatchesFilter(filePath, filterPath: includePath) { + return false + } + + for excluded in excludePaths where pathMatchesFilter(filePath, filterPath: excluded) { + return false + } + + return true +} + +private func pathMatchesFilter(_ filePath: String, filterPath: String) -> Bool { + filePath == filterPath || filePath.hasPrefix(filterPath + "/") +} diff --git a/Sources/CodeMapper/ShapeSnapshot.swift b/Sources/CodeMapper/ShapeSnapshot.swift new file mode 100644 index 0000000..93131cc --- /dev/null +++ b/Sources/CodeMapper/ShapeSnapshot.swift @@ -0,0 +1,609 @@ +import Foundation + +struct ShapeSnapshot: Codable { + let root: String + let files: [String] + let types: [TypeShape] + + static func build(symbolTable: SymbolTable, packageRoot: String, pathFilter: String?, excludePaths: [String]) -> ShapeSnapshot { + let filteredFiles = symbolTable.fileTargets.keys.sorted().filter { filePath in + pathIsIncluded(filePath, includePath: pathFilter, excludePaths: excludePaths) + } + let relevantSymbols = filteredFiles.flatMap { symbolTable.fileSymbols[$0] ?? [] } + let relevantExtensions = filteredFiles.flatMap { symbolTable.fileExtensions[$0] ?? [] } + + var ownerNames = Set() + for sym in relevantSymbols where sym.typeName.isEmpty && !sym.kind.isMemberKind { + ownerNames.insert(sym.name) + } + for ext in relevantExtensions { + ownerNames.insert(ext.baseType) + } + + let types = ownerNames.sorted().map { ownerName -> TypeShape in + let declarations = relevantSymbols + .filter { $0.typeName.isEmpty && $0.name == ownerName && !$0.kind.isMemberKind } + .sorted { lhs, rhs in + if lhs.filePath != rhs.filePath { return lhs.filePath < rhs.filePath } + if lhs.selectionLine != rhs.selectionLine { return lhs.selectionLine < rhs.selectionLine } + return lhs.selectionChar < rhs.selectionChar + } + let members = relevantSymbols.filter { $0.typeName == ownerName } + let extensions = relevantExtensions.filter { $0.baseType == ownerName } + + let kind = declarations.first?.kind.rawValue + ?? (extensions.isEmpty ? SymbolKind.unknown.rawValue : SymbolKind.extension.rawValue) + let superclass = declarations.lazy.compactMap(\.superclass).first + + var conformances = Set() + for decl in declarations { + decl.conformances.forEach { conformances.insert($0) } + } + for ext in extensions { + ext.conformances.forEach { conformances.insert($0) } + } + + let nestedTypes = members + .filter { !$0.kind.isMemberKind } + .map { "\($0.kind.rawValue):\($0.name)" } + .sorted() + + let properties = members + .filter { $0.kind == .property } + .map(normalizedPropertyKey) + .sorted() + + let methods = members + .filter { $0.kind == .function } + .map(normalizedCallableKey) + .sorted() + + let initializers = members + .filter { $0.kind == .initializer } + .map(normalizedCallableKey) + .sorted() + + return TypeShape( + name: ownerName, + kind: kind, + superclass: superclass, + conformances: conformances.sorted(), + nestedTypes: nestedTypes, + properties: properties, + methods: methods, + initializers: initializers + ) + } + + let relFiles = filteredFiles.map { filePath in + filePath.hasPrefix(packageRoot + "/") + ? String(filePath.dropFirst(packageRoot.count + 1)) + : filePath + } + + return ShapeSnapshot(root: packageRoot, files: relFiles, types: types) + } +} + +struct TypeShape: Codable { + let name: String + let kind: String + let superclass: String? + let conformances: [String] + let nestedTypes: [String] + let properties: [String] + let methods: [String] + let initializers: [String] +} + +struct ShapeDiff { + let baselineOnlyTypes: [String] + let candidateOnlyTypes: [String] + let changedTypes: [TypeShapeDiff] + + var hasDifferences: Bool { + !baselineOnlyTypes.isEmpty || !candidateOnlyTypes.isEmpty || !changedTypes.isEmpty + } + + static func compare(baseline: ShapeSnapshot, candidate: ShapeSnapshot) -> ShapeDiff { + let baselineByName = Dictionary(uniqueKeysWithValues: baseline.types.map { ($0.name, $0) }) + let candidateByName = Dictionary(uniqueKeysWithValues: candidate.types.map { ($0.name, $0) }) + + let baselineNames = Set(baselineByName.keys) + let candidateNames = Set(candidateByName.keys) + + let baselineOnlyTypes = baselineNames.subtracting(candidateNames).sorted() + let candidateOnlyTypes = candidateNames.subtracting(baselineNames).sorted() + + let changedTypes: [TypeShapeDiff] = baselineNames.intersection(candidateNames).sorted().compactMap { name -> TypeShapeDiff? in + guard let baselineType = baselineByName[name], let candidateType = candidateByName[name] else { return nil } + return TypeShapeDiff.compare(name: name, baseline: baselineType, candidate: candidateType) + } + + return ShapeDiff( + baselineOnlyTypes: baselineOnlyTypes, + candidateOnlyTypes: candidateOnlyTypes, + changedTypes: changedTypes + ) + } + + func render(baselineLabel: String, candidateLabel: String) -> String { + var lines: [String] = [] + lines.append("Shape comparison") + lines.append("baseline: \(baselineLabel)") + lines.append("candidate: \(candidateLabel)") + + if !hasDifferences { + lines.append("") + lines.append("No shape differences found.") + return lines.joined(separator: "\n") + "\n" + } + + if !baselineOnlyTypes.isEmpty { + lines.append("") + lines.append("Only in baseline:") + for name in baselineOnlyTypes { + lines.append(" - \(name)") + } + } + + if !candidateOnlyTypes.isEmpty { + lines.append("") + lines.append("Only in candidate:") + for name in candidateOnlyTypes { + lines.append(" - \(name)") + } + } + + if !changedTypes.isEmpty { + lines.append("") + lines.append("Changed types:") + for diff in changedTypes { + lines.append(" \(diff.name)") + lines.append(contentsOf: diff.renderDetails().map { " " + $0 }) + } + } + + return lines.joined(separator: "\n") + "\n" + } +} + +struct TypeShapeDiff { + let name: String + let kindMismatch: (String, String)? + let superclassMismatch: (String?, String?)? + let baselineOnlyConformances: [String] + let candidateOnlyConformances: [String] + let baselineOnlyNestedTypes: [String] + let candidateOnlyNestedTypes: [String] + let baselineOnlyProperties: [String] + let candidateOnlyProperties: [String] + let baselineOnlyMethods: [String] + let candidateOnlyMethods: [String] + let baselineOnlyInitializers: [String] + let candidateOnlyInitializers: [String] + + var hasDifferences: Bool { + kindMismatch != nil || + superclassMismatch != nil || + !baselineOnlyConformances.isEmpty || + !candidateOnlyConformances.isEmpty || + !baselineOnlyNestedTypes.isEmpty || + !candidateOnlyNestedTypes.isEmpty || + !baselineOnlyProperties.isEmpty || + !candidateOnlyProperties.isEmpty || + !baselineOnlyMethods.isEmpty || + !candidateOnlyMethods.isEmpty || + !baselineOnlyInitializers.isEmpty || + !candidateOnlyInitializers.isEmpty + } + + static func compare(name: String, baseline: TypeShape, candidate: TypeShape) -> TypeShapeDiff? { + let diff = TypeShapeDiff( + name: name, + kindMismatch: baseline.kind == candidate.kind ? nil : (baseline.kind, candidate.kind), + superclassMismatch: baseline.superclass == candidate.superclass ? nil : (baseline.superclass, candidate.superclass), + baselineOnlyConformances: Set(baseline.conformances).subtracting(candidate.conformances).sorted(), + candidateOnlyConformances: Set(candidate.conformances).subtracting(baseline.conformances).sorted(), + baselineOnlyNestedTypes: Set(baseline.nestedTypes).subtracting(candidate.nestedTypes).sorted(), + candidateOnlyNestedTypes: Set(candidate.nestedTypes).subtracting(baseline.nestedTypes).sorted(), + baselineOnlyProperties: Set(baseline.properties).subtracting(candidate.properties).sorted(), + candidateOnlyProperties: Set(candidate.properties).subtracting(baseline.properties).sorted(), + baselineOnlyMethods: Set(baseline.methods).subtracting(candidate.methods).sorted(), + candidateOnlyMethods: Set(candidate.methods).subtracting(baseline.methods).sorted(), + baselineOnlyInitializers: Set(baseline.initializers).subtracting(candidate.initializers).sorted(), + candidateOnlyInitializers: Set(candidate.initializers).subtracting(baseline.initializers).sorted() + ) + return diff.hasDifferences ? diff : nil + } + + func renderDetails() -> [String] { + var lines: [String] = [] + if let kindMismatch { + lines.append("kind: \(kindMismatch.0) -> \(kindMismatch.1)") + } + if let superclassMismatch { + lines.append("superclass: \(superclassMismatch.0 ?? "nil") -> \(superclassMismatch.1 ?? "nil")") + } + if !baselineOnlyConformances.isEmpty { + lines.append("missing conformances: \(baselineOnlyConformances.joined(separator: ", "))") + } + if !candidateOnlyConformances.isEmpty { + lines.append("extra conformances: \(candidateOnlyConformances.joined(separator: ", "))") + } + if !baselineOnlyNestedTypes.isEmpty { + lines.append("missing nested types: \(baselineOnlyNestedTypes.joined(separator: ", "))") + } + if !candidateOnlyNestedTypes.isEmpty { + lines.append("extra nested types: \(candidateOnlyNestedTypes.joined(separator: ", "))") + } + if !baselineOnlyProperties.isEmpty { + lines.append("missing properties: \(baselineOnlyProperties.joined(separator: ", "))") + } + if !candidateOnlyProperties.isEmpty { + lines.append("extra properties: \(candidateOnlyProperties.joined(separator: ", "))") + } + if !baselineOnlyMethods.isEmpty { + lines.append("missing methods: \(baselineOnlyMethods.joined(separator: ", "))") + } + if !candidateOnlyMethods.isEmpty { + lines.append("extra methods: \(candidateOnlyMethods.joined(separator: ", "))") + } + if !baselineOnlyInitializers.isEmpty { + lines.append("missing initializers: \(baselineOnlyInitializers.joined(separator: ", "))") + } + if !candidateOnlyInitializers.isEmpty { + lines.append("extra initializers: \(candidateOnlyInitializers.joined(separator: ", "))") + } + return lines + } +} + +private extension SymbolKind { + var isMemberKind: Bool { + self == .property || self == .function || self == .initializer + } +} + +private func normalizedPropertyKey(_ sym: SymbolInfo) -> String { + let name = canonicalIdentifier(sym.name) + if let returnType = sym.returnType, !returnType.isEmpty { + return "\(name):\(normalizeTypeString(returnType))" + } + return name +} + +private func normalizedCallableKey(_ sym: SymbolInfo) -> String { + switch detectSignatureLanguage(sym.signature) { + case .swift: + return normalizedSwiftCallableKey(sym) + case .objectiveC: + return normalizedObjCCallableKey(sym) + case .cLike: + return normalizedCLikeCallableKey(sym) + } +} + +private enum SignatureLanguage { + case swift + case objectiveC + case cLike +} + +private func detectSignatureLanguage(_ signature: String) -> SignatureLanguage { + if signature.contains("func ") || signature.contains("init(") || signature.contains("init<") { + return .swift + } + if signature.hasPrefix("- ") || signature.hasPrefix("+ ") || signature.contains("@property") { + return .objectiveC + } + return .cLike +} + +private func normalizedSwiftCallableKey(_ sym: SymbolInfo) -> String { + let signature = sym.signature.trimmingCharacters(in: .whitespacesAndNewlines) + let prefix = sym.kind == .initializer ? "init" : canonicalSwiftBaseName(sym.name) + guard let open = signature.firstIndex(of: "("), + let close = matchingParen(in: signature, open: open) + else { + return prefix + } + let params = String(signature[signature.index(after: open).. String in + let part = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard let colon = part.firstIndex(of: ":") else { return "_" } + let labelPart = part[.. String { + let signature = sym.signature.trimmingCharacters(in: .whitespacesAndNewlines) + guard let close = signature.firstIndex(of: ")") else { return sym.name } + let tail = signature[signature.index(after: close)...].trimmingCharacters(in: .whitespacesAndNewlines) + if !tail.contains(":") { + let name = tail.split(whereSeparator: \.isWhitespace).first.map(String.init) ?? sym.name + return name + } + + let components = splitObjectiveCSelector(tail) + if components.isEmpty { + return canonicalObjectiveCZeroArgName(sym.name) + } + return canonicalObjectiveCCallableKey(components, kind: sym.kind) +} + +private func normalizedCLikeCallableKey(_ sym: SymbolInfo) -> String { + let signature = sym.signature.trimmingCharacters(in: .whitespacesAndNewlines) + guard let open = signature.firstIndex(of: "(") else { return sym.name } + let beforeParen = signature[.. [String] { + var result: [String] = [] + var segment = "" + var parenDepth = 0 + var angleDepth = 0 + + for ch in tail { + switch ch { + case "(": + parenDepth += 1 + case ")": + if parenDepth > 0 { parenDepth -= 1 } + case "<": + angleDepth += 1 + case ">": + if angleDepth > 0 { angleDepth -= 1 } + case ":": + if parenDepth == 0 && angleDepth == 0 { + if let label = lastIdentifier(in: segment) { + result.append(label) + } + segment = "" + } + default: + segment.append(ch) + } + } + + return result +} + +private func splitTopLevel(_ text: String, separator: Character) -> [String] { + var parts: [String] = [] + var current = "" + var parenDepth = 0 + var angleDepth = 0 + var bracketDepth = 0 + + for ch in text { + switch ch { + case "(": + parenDepth += 1 + case ")": + if parenDepth > 0 { parenDepth -= 1 } + case "<": + angleDepth += 1 + case ">": + if angleDepth > 0 { angleDepth -= 1 } + case "[": + bracketDepth += 1 + case "]": + if bracketDepth > 0 { bracketDepth -= 1 } + default: + break + } + + if ch == separator && parenDepth == 0 && angleDepth == 0 && bracketDepth == 0 { + parts.append(current) + current = "" + } else { + current.append(ch) + } + } + + if !current.isEmpty { + parts.append(current) + } + return parts +} + +private func matchingParen(in text: String, open: String.Index) -> String.Index? { + var depth = 0 + var index = open + while index < text.endIndex { + let ch = text[index] + if ch == "(" { + depth += 1 + } else if ch == ")" { + depth -= 1 + if depth == 0 { return index } + } + index = text.index(after: index) + } + return nil +} + +private func normalizeTypeString(_ type: String) -> String { + type + .replacingOccurrences(of: " ", with: "") + .replacingOccurrences(of: "?", with: "?") +} + +private func lastIdentifier(in text: String) -> String? { + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + + var end = trimmed.endIndex + while end > trimmed.startIndex { + let prev = trimmed.index(before: end) + let scalar = trimmed[prev] + if scalar.isLetter || scalar.isNumber || scalar == "_" { + break + } + end = prev + } + guard end > trimmed.startIndex else { return nil } + + var start = end + while start > trimmed.startIndex { + let prev = trimmed.index(before: start) + let scalar = trimmed[prev] + if scalar.isLetter || scalar.isNumber || scalar == "_" { + start = prev + } else { + break + } + } + + let identifier = String(trimmed[start.. String { + if let open = name.firstIndex(of: "(") { + return canonicalIdentifier(String(name[.. String { + let canonical = canonicalIdentifier(name) + if canonical.hasPrefix("get"), canonical.count > 3, let dropped = dropGetterPrefix(canonical) { + return dropped + "()" + } + return canonical + "()" +} + +private func canonicalObjectiveCCallableKey(_ rawComponents: [String], kind: SymbolKind) -> String { + var components = rawComponents.map(normalizeObjectiveCSelectorComponent) + if let last = components.last, last == "error" { + components.removeLast() + } + + guard let first = components.first else { + return kind == .initializer ? "init()" : canonicalIdentifier(rawComponents.first ?? "") + } + + if kind == .initializer { + return canonicalObjectiveCInitializerKey(components) + } + + if components.count == 1 { + let single = first + if single.hasSuffix("WithError"), let base = single.dropSuffix("WithError") { + if let getter = dropGetterPrefix(base) { + return getter + "()" + } + return base + "()" + } + if single.hasPrefix("set"), single.count > 3 { + return single + "(_:)" + } + return single + "(_:)" + } + + if let (base, firstLabel) = splitWithLabel(first) { + let labels = [firstLabel] + Array(components.dropFirst()) + return base + "(" + labels.map { "\($0):" }.joined() + ")" + } + + return first + "(" + Array(components.dropFirst()).map { "\($0):" }.joined() + ")" +} + +private func canonicalObjectiveCInitializerKey(_ components: [String]) -> String { + guard let first = components.first else { return "init()" } + if first == "init" && components.count == 1 { + return "init()" + } + if let suffix = first.dropPrefix("initWith"), !suffix.isEmpty { + let firstLabel = lowercasedFirst(suffix) + let remaining = [firstLabel] + Array(components.dropFirst()) + return "init(" + remaining.map { "\($0):" }.joined() + ")" + } + if first == "init" { + return "init(" + Array(components.dropFirst()).map { "\($0):" }.joined() + ")" + } + return "init(" + components.map { "\($0):" }.joined() + ")" +} + +private func splitWithLabel(_ value: String) -> (String, String)? { + guard let range = value.range(of: "With"), + range.lowerBound != value.startIndex, + range.upperBound != value.endIndex + else { return nil } + + let base = String(value[.. String? { + guard let suffix = value.dropPrefix("get"), !suffix.isEmpty else { return nil } + return lowercasedFirst(suffix) +} + +private func canonicalIdentifier(_ value: String) -> String { + let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return trimmed } + if trimmed.contains("_") { + let parts = trimmed.split(separator: "_").map(String.init) + guard let first = parts.first else { return trimmed } + let rest = parts.dropFirst().map { uppercasedFirst($0) } + return ([lowercasedFirst(first)] + rest).joined() + } + return trimmed +} + +private func normalizeObjectiveCSelectorComponent(_ value: String) -> String { + let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return trimmed } + + var candidate = trimmed + if let close = candidate.lastIndex(of: ")"), close < candidate.index(before: candidate.endIndex) { + candidate = String(candidate[candidate.index(after: close)...]).trimmingCharacters(in: .whitespacesAndNewlines) + } + if candidate.contains(" ") { + candidate = candidate.split(whereSeparator: \.isWhitespace).last.map(String.init) ?? candidate + } + return canonicalIdentifier(candidate) +} + +private func lowercasedFirst(_ value: String) -> String { + guard let first = value.first else { return value } + return String(first).lowercased() + value.dropFirst() +} + +private func uppercasedFirst(_ value: String) -> String { + guard let first = value.first else { return value } + return String(first).uppercased() + value.dropFirst() +} + +private extension String { + func dropPrefix(_ prefix: String) -> String? { + guard hasPrefix(prefix) else { return nil } + return String(dropFirst(prefix.count)) + } + + func dropSuffix(_ suffix: String) -> String? { + guard hasSuffix(suffix) else { return nil } + return String(dropLast(suffix.count)) + } +} diff --git a/Sources/CodeMapper/SourceLanguage.swift b/Sources/CodeMapper/SourceLanguage.swift new file mode 100644 index 0000000..9c1d51b --- /dev/null +++ b/Sources/CodeMapper/SourceLanguage.swift @@ -0,0 +1,78 @@ +import Foundation + +/// Source language family, derived from file extension. Drives both which +/// signature-parsing pass `SymbolExtractor` runs and which LSP `languageId` +/// is reported on `textDocument/didOpen` (so sourcekit-lsp routes the file +/// to the right backend — SourceKit for Swift, clangd for C-family). +enum SourceLanguage { + case swift + case cFamily(languageId: String) + + var lspLanguageId: String { + switch self { + case .swift: return "swift" + case .cFamily(let languageId): return languageId + } + } + + /// Recognized source extensions, mapped to their language. Extend this + /// set to teach CodeMapper about additional source kinds. + static let extensionLanguageIds: [String: String] = [ + "swift": "swift", + "h": "objective-c", + "m": "objective-c", + "mm": "objective-cpp", + "c": "c", + "cc": "cpp", + "cpp": "cpp", + "cxx": "cpp", + "hpp": "cpp", + ] + + init?(pathExtension: String) { + guard let languageId = SourceLanguage.extensionLanguageIds[pathExtension.lowercased()] else { return nil } + self = languageId == "swift" ? .swift : .cFamily(languageId: languageId) + } + + /// Maps a file extension to the clang driver `-x` value clangd needs in + /// a compilation database to know how to parse it. Bare `.h` files are + /// inherently ambiguous (C? C++? Objective-C?) — without an explicit `-x`, + /// clangd falls back to plain C, which can't parse `@interface`/`@protocol` + /// and silently yields zero symbols for Objective-C headers. + static func clangDriverLanguage(forExtension ext: String) -> String? { + switch ext.lowercased() { + case "h": return "objective-c-header" + case "hpp": return "c++-header" + case "m": return "objective-c" + case "mm": return "objective-c++" + case "c": return "c" + case "cc", "cpp", "cxx": return "c++" + default: return nil + } + } + + /// `.h` is ambiguous. When the header text clearly uses C++ surface, drive + /// clangd as Objective-C++ header so templates/namespaces/std:: types parse. + static func clangDriverLanguage(forPath path: String, fallbackExtension ext: String) -> String? { + let fallback = clangDriverLanguage(forExtension: ext) + guard ext.lowercased() == "h", + let content = try? String(contentsOfFile: path, encoding: .utf8) + else { return fallback } + + let cxxMarkers = [ + "namespace ", + "std::", + "template<", + "template <", + "optional<", + "vector<", + "string>", + "onnxruntime_cxx", + "Ort::" + ] + if cxxMarkers.contains(where: { content.contains($0) }) { + return "objective-c++-header" + } + return fallback + } +} diff --git a/Sources/CodeMapper/SourceWalker.swift b/Sources/CodeMapper/SourceWalker.swift index 81fcaed..0facc59 100644 --- a/Sources/CodeMapper/SourceWalker.swift +++ b/Sources/CodeMapper/SourceWalker.swift @@ -51,10 +51,10 @@ struct SourceWalker { return targets } - func discoverFiles(symbolTable: SymbolTable) -> [(filePath: String, targetName: String)] { + func discoverFiles(symbolTable: SymbolTable) -> [(filePath: String, targetName: String, language: SourceLanguage)] { let targets = discoverTargets() - var results: [(String, String)] = [] + var results: [(String, String, SourceLanguage)] = [] let fm = FileManager.default for target in targets { @@ -66,11 +66,11 @@ struct SourceWalker { ) else { continue } for case let url as URL in enumerator { - guard url.pathExtension == "swift" else { continue } + guard let language = SourceLanguage(pathExtension: url.pathExtension) else { continue } let path = url.path guard !path.contains("/.build/") else { continue } symbolTable.fileTargets[path] = target.name - results.append((path, target.name)) + results.append((path, target.name, language)) } } @@ -84,10 +84,10 @@ struct SourceWalker { ) else { return [] } for case let url as URL in enumerator { - guard url.pathExtension == "swift" else { continue } + guard let language = SourceLanguage(pathExtension: url.pathExtension) else { continue } let path = url.path symbolTable.fileTargets[path] = filter - results.append((path, filter)) + results.append((path, filter, language)) } } diff --git a/Sources/CodeMapper/SymbolExtractor.swift b/Sources/CodeMapper/SymbolExtractor.swift index cf62aff..350e714 100644 --- a/Sources/CodeMapper/SymbolExtractor.swift +++ b/Sources/CodeMapper/SymbolExtractor.swift @@ -4,28 +4,90 @@ struct SymbolExtractor { let lsp: LSPClient let symbolTable: SymbolTable - func process(filePath: String, targetName: String) throws { - let uri = "file://" + filePath + func process(filePath: String, targetName: String, language: SourceLanguage = .swift, parsePath: String? = nil) throws { + let lspPath = parsePath ?? filePath + let uri = "file://" + lspPath guard let content = try? String(contentsOfFile: filePath, encoding: .utf8) else { return } let sourceBytes = Array(content.utf8) let lineOffsets = buildLineOffsets(content) + let parser = declarationParser(for: language) - try lsp.openFile(uri: uri, content: content) + try lsp.openFile(uri: uri, content: content, languageId: language.lspLanguageId) defer { try? lsp.closeFile(uri: uri) } let rawSymbols = try lsp.documentSymbol(uri: uri) - flatten(rawSymbols, parent: nil, filePath: filePath, targetName: targetName, - sourceBytes: sourceBytes, lineOffsets: lineOffsets) + reportFatalDiagnostics(uri: uri, filePath: filePath) + _ = flatten(rawSymbols, parent: nil, containerKind: nil, filePath: filePath, targetName: targetName, + sourceBytes: sourceBytes, lineOffsets: lineOffsets, parser: parser, language: language) + warnIfLikelyMisparsed(content: content, filePath: filePath, language: language) + } + + /// Heuristic fallback for when the LSP silently mis-parses a C-family file + /// without surfacing any diagnostic — confirmed to happen with sourcekit-lsp's + /// clangd multiplexing (e.g. a fatal "header not found" preprocessor error + /// makes clangd fall into recovery mode: `documentSymbol` returns *garbage* + /// fragments — stray identifiers misclassified as properties/methods — + /// rather than the real `@interface`/`@protocol` declarations, and zero + /// `publishDiagnostics` notifications are sent; verified empirically that + /// even outright broken syntax produces no diagnostic). A simple "zero + /// symbols" check misses this because the recovery-mode fragments do get + /// registered. Instead, compare what the source *textually* declares + /// against what actually landed as a top-level type/extension — a + /// significant gap is a strong signal of a silent parse failure. + private func warnIfLikelyMisparsed(content: String, filePath: String, language: SourceLanguage) { + guard case .cFamily = language else { return } + + let declaredCount = content.components(separatedBy: "\n").filter { + let trimmed = $0.trimmingCharacters(in: .whitespaces) + return trimmed.hasPrefix("@interface") || trimmed.hasPrefix("@protocol") || trimmed.hasPrefix("@implementation") + }.count + guard declaredCount > 0 else { return } + + let registeredTopLevelTypes = (symbolTable.fileSymbols[filePath] ?? []).filter { + $0.typeName.isEmpty && [.class, .protocol, .struct, .enum].contains($0.kind) + }.count + let registeredExtensions = (symbolTable.fileExtensions[filePath] ?? []).count + + guard registeredTopLevelTypes + registeredExtensions == 0 else { return } + fputs("Warning: \(filePath): source textually declares \(declaredCount) @interface/@protocol/@implementation block(s) but the LSP produced none of them as usable symbols — likely a silent parse failure (e.g. an unresolvable #import) that produced no diagnostic\n", stderr) + } + + /// Surfaces error-severity diagnostics (e.g. missing-header preprocessor + /// errors) on stderr. Without this, a fatal parse error silently produces + /// an empty symbol list — indistinguishable from "this file legitimately + /// declares nothing" in the map output. + private func reportFatalDiagnostics(uri: String, filePath: String) { + let diagnostics = lsp.takeDiagnostics(uri: uri) + for diagnostic in diagnostics { + // LSP DiagnosticSeverity: 1 = Error, 2 = Warning, 3 = Information, 4 = Hint + guard (diagnostic["severity"] as? Int) == 1, + let message = diagnostic["message"] as? String + else { continue } + let line = ((diagnostic["range"] as? [String: Any])?["start"] as? [String: Any])?["line"] as? Int + let location = line.map { ":\($0 + 1)" } ?? "" + fputs("Warning: \(filePath)\(location): \(message)\n", stderr) + } + } + + private func declarationParser(for language: SourceLanguage) -> DeclarationParser { + switch language { + case .swift: return SwiftDeclarationParser() + case .cFamily: return CFamilyDeclarationParser() + } } private func flatten( _ symbols: [[String: Any]], parent: String?, + containerKind: SymbolKind?, filePath: String, targetName: String, sourceBytes: [UInt8], - lineOffsets: [Int] - ) { + lineOffsets: [Int], + parser: DeclarationParser, + language: SourceLanguage + ) -> Int { + var registeredCount = 0 for raw in symbols { guard let name = raw["name"] as? String, let kind = raw["kind"] as? Int, @@ -36,29 +98,46 @@ struct SymbolExtractor { let rangeStart = position(rangeDict["start"] as? [String: Any]) let selStart = position(selDict["start"] as? [String: Any]) - let startOffset = offsetFor(line: rangeStart.line, char: rangeStart.char, offsets: lineOffsets) - let sig = extractSignature(sourceBytes: sourceBytes, startOffset: startOffset) + let startOffset = startOffsetForSymbol( + rangeStart: rangeStart, + parent: parent, + offsets: lineOffsets, + language: language + ) + let sig = parser.extractSignature(sourceBytes: sourceBytes, startOffset: startOffset) + + let parsed = parser.parse(sig, lspKind: kind, name: name) + + // Method/function children are parameters or locals, not API + // surface, and clangd commonly reports them as variables/properties. + if containerKind == .function || containerKind == .initializer { + continue + } - let parsed = parseSignature(sig, lspKind: kind, name: name) let qualifiedName = parent.map { "\($0).\(name)" } ?? name - if parsed.isExtension { - // Register extension metadata; methods inside will use base type as parent - let ext = ExtensionInfo(baseType: name, conformances: parsed.conformances) + if parsed.isExtension || parsed.kind == .extension { + // For Swift `extension Foo: Proto {}` the symbol's own `name` is the + // base type. For an Objective-C category `@interface Foo (Bar)`, + // `CFamilyDeclarationParser` stashes the base type in `returnType` + // (its `name` is the category's own name, not the type it extends). + let baseType = parsed.returnType ?? name + let ext = ExtensionInfo(baseType: baseType, conformances: parsed.conformances) symbolTable.registerExtension(ext, filePath: filePath) + registeredCount += 1 - // Recurse with name as the "parent" so methods get attributed to the base type + // Recurse with the base type as "parent" so methods get attributed to it let children = raw["children"] as? [[String: Any]] ?? [] - flatten(children, parent: name, filePath: filePath, targetName: targetName, - sourceBytes: sourceBytes, lineOffsets: lineOffsets) + registeredCount += flatten(children, parent: baseType, containerKind: .class, filePath: filePath, targetName: targetName, + sourceBytes: sourceBytes, lineOffsets: lineOffsets, parser: parser, language: language) continue } // Skip enum cases, type aliases, imports etc. guard parsed.kind != .unknown else { let children = raw["children"] as? [[String: Any]] ?? [] - flatten(children, parent: parent ?? name, filePath: filePath, targetName: targetName, - sourceBytes: sourceBytes, lineOffsets: lineOffsets) + registeredCount += flatten(children, parent: parent ?? name, containerKind: containerKind, filePath: filePath, targetName: targetName, + sourceBytes: sourceBytes, lineOffsets: lineOffsets, parser: parser, language: language) continue } @@ -81,14 +160,16 @@ struct SymbolExtractor { selectionChar: selStart.char ) symbolTable.registerSymbol(sym) + registeredCount += 1 let children = raw["children"] as? [[String: Any]] ?? [] let childParent = (parsed.kind == .function || parsed.kind == .initializer || parsed.kind == .property) ? parent : name - flatten(children, parent: childParent, filePath: filePath, targetName: targetName, - sourceBytes: sourceBytes, lineOffsets: lineOffsets) + registeredCount += flatten(children, parent: childParent, containerKind: parsed.kind, filePath: filePath, targetName: targetName, + sourceBytes: sourceBytes, lineOffsets: lineOffsets, parser: parser, language: language) } + return registeredCount } // MARK: - Helpers @@ -112,162 +193,24 @@ struct SymbolExtractor { return offsets[line] + char } - private func extractSignature(sourceBytes: [UInt8], startOffset: Int) -> String { - var parenDepth = 0 - var bracketDepth = 0 - var i = startOffset - while i < sourceBytes.count { - let b = sourceBytes[i] - if b == UInt8(ascii: "{") && parenDepth == 0 && bracketDepth == 0 { break } - // Stop at newline for protocol requirements / computed property stubs - if b == UInt8(ascii: "\n") && parenDepth == 0 && bracketDepth == 0 { - // Check if next non-whitespace is `{` — if not, this is a one-liner with no body - var j = i + 1 - while j < sourceBytes.count && (sourceBytes[j] == 0x20 || sourceBytes[j] == 0x09) { j += 1 } - if j < sourceBytes.count && sourceBytes[j] != UInt8(ascii: "{") { break } + private func startOffsetForSymbol( + rangeStart: (line: Int, char: Int), + parent: String?, + offsets: [Int], + language: SourceLanguage + ) -> Int { + switch language { + case .swift: + return offsetFor(line: rangeStart.line, char: rangeStart.char, offsets: offsets) + case .cFamily: + // clangd often anchors top-level Objective-C symbols on the type + // token instead of the `@interface` / `@implementation` prefix. + // Starting from the declaration line gives the parser the full + // header text and stabilizes category/class-extension detection. + if parent == nil { + return offsetFor(line: rangeStart.line, char: 0, offsets: offsets) } - switch b { - case UInt8(ascii: "("): parenDepth += 1 - case UInt8(ascii: ")"): if parenDepth > 0 { parenDepth -= 1 } - case UInt8(ascii: "["): bracketDepth += 1 - case UInt8(ascii: "]"): if bracketDepth > 0 { bracketDepth -= 1 } - default: break - } - i += 1 + return offsetFor(line: rangeStart.line, char: rangeStart.char, offsets: offsets) } - - let sigBytes = Array(sourceBytes[startOffset.. ParsedDecl { - var result = ParsedDecl( - kind: lspKindToSymbolKind(lspKind, sig: sig), - accessLevel: "", - isAsync: false, - isThrows: false, - isExtension: sig.hasPrefix("extension ") || sig.contains(" extension "), - returnType: nil, - superclass: nil, - conformances: [] - ) - - // Access level - let lowSig = sig - if lowSig.contains("public ") || lowSig.contains("open ") { - result.accessLevel = "pub" - } else if lowSig.contains("private ") || lowSig.contains("fileprivate ") { - result.accessLevel = "priv" - } - - result.isAsync = sig.contains(" async") || sig.contains(" async\n") - result.isThrows = sig.contains(" throws") || sig.contains(" rethrows") - - // Return type — after last `->` - if let arrowRange = sig.range(of: "->", options: .backwards) { - let candidate = String(sig[arrowRange.upperBound...]).trimmingCharacters(in: .whitespaces) - if !candidate.isEmpty { result.returnType = candidate } - } - - // Inheritance / conformances — after `:` in type/extension declarations - if result.kind == .class || result.kind == .struct || result.kind == .enum || - result.kind == .actor || result.kind == .protocol || result.isExtension { - if let colonRange = findColon(in: sig, after: name) { - let inherited = String(sig[sig.index(after: colonRange)...]) - .trimmingCharacters(in: .whitespaces) - let parts = inherited.components(separatedBy: ",") - .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } - .filter { !$0.isEmpty && !$0.contains(" where ") } - .map { stripGenericConstraints($0) } - .filter { !$0.isEmpty } - - if result.kind == .class && !parts.isEmpty { - // Heuristic: first item is superclass for class declarations - // Only if it looks like a concrete type (starts uppercase, no Protocol suffix convention) - let first = parts[0] - result.superclass = first - result.conformances = parts.count > 1 ? Array(parts[1...]) : [] - } else { - result.conformances = parts - } - } - } - - return result - } - - private func lspKindToSymbolKind(_ kind: Int, sig: String) -> SymbolKind { - if sig.hasPrefix("extension ") || sig.contains(" extension ") { return .extension } - switch kind { - case 5: // Class — also used for actors sometimes - if sig.contains("actor ") { return .actor } - return .class - case 6, 12: return .function // Method, Function - case 7, 8: return .property // Property, Field - case 9: return .initializer - case 10: return .enum - case 11: return .protocol // Interface - case 13: return .property // Variable - case 23: return .struct - default: - if sig.contains("actor ") { return .actor } - if sig.contains("struct ") { return .struct } - if sig.contains("class ") { return .class } - if sig.contains("enum ") { return .enum } - if sig.contains("protocol ") { return .protocol } - if sig.contains("func ") { return .function } - if sig.contains("init(") || sig.contains("init<") { return .initializer } - return .unknown - } - } - - private func findColon(in sig: String, after name: String) -> String.Index? { - // Find `:` that comes after the type name, outside angle brackets - var depth = 0 - var i = sig.startIndex - - // Skip past the name first - if let nameRange = sig.range(of: name) { - i = nameRange.upperBound - } - - while i < sig.endIndex { - let c = sig[i] - switch c { - case "<": depth += 1 - case ">": if depth > 0 { depth -= 1 } - case ":": - if depth == 0 { return i } - case "(": - // Once we're inside parameter list, no more inheritance - return nil - default: break - } - i = sig.index(after: i) - } - return nil - } - - private func stripGenericConstraints(_ s: String) -> String { - // Remove trailing generic constraints like `where T: Something` - if let whereRange = s.range(of: " where ") { - return String(s[..> and << call graph lines (faster, fewer tokens).") var noCalls: Bool = false @@ -31,6 +34,18 @@ struct CodeMapper: ParsableCommand { @Flag(name: .long, help: "Print map output to stdout instead of writing .map files.") var stdout: Bool = false + @Option(name: .long, help: "Write a canonical API-shape snapshot JSON to this path. Use '-' for stdout.") + var snapshotOut: String? + + @Option(name: .long, help: "Compare the current extracted shape against a previously written snapshot JSON.") + var compareSnapshot: String? + + @Flag(name: .long, help: "Skip .map generation and only emit/compare shape snapshots.") + var shapeOnly: Bool = false + + @Flag(name: .long, help: "Exit non-zero when a shape comparison finds differences.") + var failOnDiff: Bool = false + mutating func run() throws { let packageRoot = (sources as NSString).standardizingPath @@ -39,17 +54,28 @@ struct CodeMapper: ParsableCommand { log("LSP: \(lspPath)") let symbolTable = SymbolTable() + let resolvedPath = path.map { ($0 as NSString).standardizingPath } + let resolvedExcludePaths = excludePath.map { ($0 as NSString).standardizingPath } let walker = SourceWalker(packageRoot: packageRoot, filter: filter) - let files = walker.discoverFiles(symbolTable: symbolTable) + let discoveredFiles = walker.discoverFiles(symbolTable: symbolTable) + let files = discoveredFiles.filter { filePath, _, _ in + pathIsIncluded(filePath, includePath: resolvedPath, excludePaths: resolvedExcludePaths) + } guard !files.isEmpty else { - log("No Swift files found.") + log("No matching source files found.") return } log("Found \(files.count) Swift files.") - let lsp = try LSPClient(lspPath: lspPath, projectRoot: packageRoot) + let compilationDatabase = try CompilationDatabaseWriter(directory: "/tmp/codemapper-compile-commands") + .write(files: files) + if let compilationDatabase { + log("Wrote compilation database for C-family files: \(compilationDatabase.directory)/compile_commands.json") + } + + let lsp = try LSPClient(lspPath: lspPath, projectRoot: packageRoot, compileCommandsDir: compilationDatabase?.directory) log("Initializing LSP...") try lsp.initialize() log("LSP ready.") @@ -57,10 +83,11 @@ struct CodeMapper: ParsableCommand { let extractor = SymbolExtractor(lsp: lsp, symbolTable: symbolTable) log("Pass 1: extracting symbols...") - for (i, (filePath, targetName)) in files.enumerated() { + for (i, (filePath, targetName, language)) in files.enumerated() { if (i + 1) % 20 == 0 { log(" \(i + 1)/\(files.count)") } do { - try extractor.process(filePath: filePath, targetName: targetName) + let parsePath = compilationDatabase?.parsePathByOriginalPath[filePath] + try extractor.process(filePath: filePath, targetName: targetName, language: language, parsePath: parsePath) } catch { fputs("Warning: symbol extraction failed for \(filePath): \(error)\n", stderr) } @@ -70,7 +97,7 @@ struct CodeMapper: ParsableCommand { if !noCalls { let callBuilder = CallGraphBuilder(lsp: lsp, symbolTable: symbolTable) log("Pass 1b: building call graph (may be slow on first run)...") - for (i, (filePath, _)) in files.enumerated() { + for (i, (filePath, _, _)) in files.enumerated() { if (i + 1) % 10 == 0 { log(" \(i + 1)/\(files.count)") } do { try callBuilder.process(filePath: filePath) @@ -85,15 +112,49 @@ struct CodeMapper: ParsableCommand { symbolTable.buildReverseIndex() symbolTable.buildImplementorMap() - let resolvedPath = path.map { ($0 as NSString).standardizingPath } - let writer = OutputWriter(symbolTable: symbolTable, packageRoot: packageRoot, includeCalls: !noCalls, outgoingOnly: outgoingOnly, pathFilter: resolvedPath) + let snapshot = ShapeSnapshot.build( + symbolTable: symbolTable, + packageRoot: packageRoot, + pathFilter: resolvedPath, + excludePaths: resolvedExcludePaths + ) - if stdout { - try writer.printAll() - } else { - log("Writing .map files...") - try writer.writeAll() - log("Done. Wrote \(symbolTable.fileTargets.count) .swift.map files.") + if let snapshotOut { + let data = try JSONEncoder.pretty.encode(snapshot) + if snapshotOut == "-" { + FileHandle.standardOutput.write(data) + } else { + try data.write(to: URL(fileURLWithPath: snapshotOut)) + } + } + + if let compareSnapshot { + let data = try Data(contentsOf: URL(fileURLWithPath: compareSnapshot)) + let baseline = try JSONDecoder().decode(ShapeSnapshot.self, from: data) + let diff = ShapeDiff.compare(baseline: baseline, candidate: snapshot) + FileHandle.standardOutput.write(Data(diff.render(baselineLabel: baseline.root, candidateLabel: snapshot.root).utf8)) + if failOnDiff && diff.hasDifferences { + throw ExitCode(2) + } + } + + let writer = OutputWriter( + symbolTable: symbolTable, + packageRoot: packageRoot, + includeCalls: !noCalls, + outgoingOnly: outgoingOnly, + pathFilter: resolvedPath, + excludePaths: resolvedExcludePaths + ) + + if !shapeOnly { + if stdout { + try writer.printAll() + } else { + log("Writing .map files...") + try writer.writeAll() + log("Done. Wrote \(symbolTable.fileTargets.count) .swift.map files.") + } } try lsp.shutdownGracefully() @@ -113,3 +174,11 @@ if CommandLine.arguments.contains("--signature") { } CodeMapper.main() + +private extension JSONEncoder { + static var pretty: JSONEncoder { + let encoder = JSONEncoder() + encoder.outputFormatting = [.prettyPrinted, .sortedKeys] + return encoder + } +} diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..8643edb --- /dev/null +++ b/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash +LIBAVCODEC_MAJOR=$(pkg-config --modversion libavcodec 2>/dev/null | cut -d. -f1) +export LIBAVCODEC_MAJOR="${LIBAVCODEC_MAJOR:-0}" +OUTFILE=$(mktemp /tmp/build_output_XXXXXX.txt) +swift build >"$OUTFILE" 2>&1 +STATUS=$? +grep -E "^/home/$(whoami)/Projects/CodeMapper/Sources/.*error:" "$OUTFILE" | sed "s|/home/$(whoami)/Projects/CodeMapper/||" | head -10 +if [ $STATUS -eq 0 ]; then + echo "Build succeeded." +else + echo "Build FAILED. Full output: $OUTFILE" +fi diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000..8b0ad51 --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -u + +LIBAVCODEC_MAJOR=$(pkg-config --modversion libavcodec 2>/dev/null | cut -d. -f1) +export LIBAVCODEC_MAJOR="${LIBAVCODEC_MAJOR:-0}" + +OUTFILE=$(mktemp /tmp/test_output_XXXXXX.txt) +START_TIME=$SECONDS + +TEST_FILTER="${1:-}" +shift $(( $# > 0 ? 1 : 0 )) + +SWIFT_TEST_ARGS=() +if [ -n "$TEST_FILTER" ]; then + SWIFT_TEST_ARGS+=(--filter "$TEST_FILTER") +fi +if [ $# -gt 0 ]; then + SWIFT_TEST_ARGS+=("$@") +fi + +swift test "${SWIFT_TEST_ARGS[@]}" >"$OUTFILE" 2>&1 +STATUS=$? +ELAPSED=$((SECONDS - START_TIME)) + +if [ -n "$TEST_FILTER" ]; then + echo "Filter: $TEST_FILTER" +else + echo "Filter: all tests" +fi + +SUMMARY_LINE=$( + grep -E "Executed [0-9]+ test(s)?, with [0-9]+ failures" "$OUTFILE" | tail -1 +) +SUITE_RESULT_LINE=$( + grep -E "Test Suite '([^']+|Selected tests|All tests)' (passed|failed)" "$OUTFILE" | tail -1 +) +if [ -n "$SUMMARY_LINE" ]; then + TOTAL_TESTS=$(printf '%s\n' "$SUMMARY_LINE" | awk '{print $2}') + TOTAL_FAILURES=$(printf '%s\n' "$SUMMARY_LINE" | awk '{print $5}') + echo "Total: $TOTAL_TESTS Failures: $TOTAL_FAILURES" +else + echo "Total: unknown Failures: unknown" +fi +if [ -n "$SUITE_RESULT_LINE" ]; then + echo "Result: $SUITE_RESULT_LINE" +fi +echo "Elapsed: ${ELAPSED}s" +echo "Build / compile errors:" +grep -E "^/home/$(whoami)/Projects/CodeMapper/(Sources|Tests)/.*error:" "$OUTFILE" | sed "s|/home/$(whoami)/Projects/CodeMapper/||" | head -10 || echo " none" +echo "Failed / crashed:" +grep "FAILED\|Fatal error\|Exited with unexpected\| failed (" "$OUTFILE" || echo " none" +echo "Full output: $OUTFILE" +exit $STATUS