Read PDF Form

Reads a pdf form and gives all values, fields, required fields, and if all required fields were filled out.

Script nextcloud Verified

by nextcloud · 4/21/2026

The script

Submitted by nextcloud Bun
Verified 23 days ago
1
import axios from "axios";
2
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
3

4
type FieldValue = string | boolean | null;
5

6
type FieldKind = "text" | "checkbox" | "other";
7

8
function trimStr(v: unknown): string {
9
  return String(v ?? "").trim();
10
}
11

12
/** True if the field has a value that counts as “filled” for PDF-required validation. */
13
function isFilled(value: FieldValue, kind: FieldKind): boolean {
14
  if (kind === "checkbox") return value === true || value === false;
15
  if (kind === "text") {
16
    return typeof value === "string" && value.trim().length > 0;
17
  }
18
  return value != null && String(value).trim().length > 0;
19
}
20

21
type WidgetAnnotation = {
22
  fieldName?: string;
23
  fieldValue?: string | null;
24
  fieldType?: string;
25
  checkBox?: boolean;
26
  required?: boolean;
27
  fieldFlags?: number;
28
};
29

30
const FIELD_FLAG_REQUIRED = 2;
31

32
function isAnnotationRequired(a: WidgetAnnotation): boolean {
33
  if (typeof a.required === "boolean") return a.required;
34
  const ff = a.fieldFlags;
35
  return typeof ff === "number" && (ff & FIELD_FLAG_REQUIRED) !== 0;
36
}
37

38
function kindFromAnnotation(a: WidgetAnnotation): FieldKind {
39
  const ft = a.fieldType;
40
  if (ft === "Tx") return "text";
41
  if (ft === "Btn") {
42
    if (a.checkBox) return "checkbox";
43
    return "other";
44
  }
45
  if (ft === "Ch") return "other";
46
  return "other";
47
}
48

49
function valueFromAnnotation(a: WidgetAnnotation): FieldValue {
50
  const ft = a.fieldType;
51
  const raw = a.fieldValue;
52

53
  if (ft === "Tx") return trimStr(raw ?? "");
54

55
  if (ft === "Btn" && a.checkBox) {
56
    const v = raw == null ? "" : String(raw);
57
    if (v === "" || v === "Off") return false;
58
    return true;
59
  }
60
  if (ft === "Ch") return trimStr(raw ?? "");
61

62
  return null;
63
}
64

65
async function loadFormData(
66
  pdfBytes: Uint8Array,
67
  password: string,
68
): Promise<{
69
  values: Record<string, FieldValue>;
70
  kinds: Record<string, FieldKind>;
71
  available_fields: string[];
72
  required_fields: string[];
73
}> {
74
  const loadingTask = getDocument({
75
    data: pdfBytes,
76
    password,
77
    disableRange: true,
78
    disableStream: true,
79
    useSystemFonts: true,
80
  });
81

82
  let pdf: { numPages: number; getPage: (n: number) => Promise<{ getAnnotations: () => Promise<unknown[]> }> };
83
  try {
84
    pdf = await loadingTask.promise;
85
  } catch (e: unknown) {
86
    const msg = e instanceof Error ? e.message : String(e);
87
    if (/password/i.test(msg)) {
88
      throw new Error(
89
        "PDF needs a password or a different one. Set `pdfPassword` or use an unencrypted PDF.",
90
      );
91
    }
92
    throw e;
93
  }
94

95
  const kinds: Record<string, FieldKind> = {};
96
  const values: Record<string, FieldValue> = {};
97
  const requiredByName: Record<string, boolean> = {};
98

99
  for (let p = 1; p <= pdf.numPages; p++) {
100
    const page = await pdf.getPage(p);
101
    const annotations = (await page.getAnnotations()) as WidgetAnnotation[];
102

103
    for (const a of annotations) {
104
      const name = a.fieldName?.trim();
105
      if (!name) continue;
106

107
      const kind = kindFromAnnotation(a);
108
      const v = valueFromAnnotation(a);
109
      kinds[name] = kind;
110
      values[name] = v;
111

112
      if (isAnnotationRequired(a)) requiredByName[name] = true;
113
      else if (requiredByName[name] === undefined) requiredByName[name] = false;
114
    }
115
  }
116

117
  const available_fields = Object.keys(values).sort();
118
  const required_fields = Object.keys(requiredByName)
119
    .filter((n) => requiredByName[n])
120
    .sort();
121

122
  return { values, kinds, available_fields, required_fields };
123
}
124

125
export async function main(
126
  nextcloud: RT.Nextcloud,
127
  pdfPath: string,
128
  pdfPassword: string | null = null,
129
): Promise<{
130
  values: Record<string, FieldValue>;
131
  available_fields: string[];
132
  required_fields: string[];
133
  filled_out: boolean;
134
}> {
135
  const getRes = await axios.get(
136
    `${String(nextcloud.baseUrl || "").replace(/\/$/, "")}/remote.php/dav/files/${encodeURIComponent(nextcloud.userId)}/${pdfPath}`,
137
    {
138
      auth: {
139
        username: nextcloud.userId,
140
        password: nextcloud.token,
141
      },
142
      responseType: "arraybuffer",
143
    },
144
  );
145

146
  if (getRes.status !== 200) {
147
    throw new Error(`Failed to download PDF (HTTP ${getRes.status}) ${getRes.statusText}`);
148
  }
149

150
  const pdfBytes = new Uint8Array(getRes.data as ArrayBuffer);
151
  const { values, kinds, available_fields, required_fields } = await loadFormData(
152
    pdfBytes,
153
    pdfPassword ?? "",
154
  );
155

156
  const filled_out = required_fields.every((name) =>
157
    isFilled(values[name], kinds[name] ?? "other"),
158
  );
159

160
  return { values, available_fields, required_fields, filled_out };
161
}
162

  • Submitted by nextcloud Bun
    Created 23 days ago
    This script is a newer edit of the script that had been approved, but it needs to be re-approved
    1
    import axios from "axios";
    2
    
    
    3
    type FieldValue = string | boolean | null;
    4
    
    
    5
    type FieldKind = "text" | "checkbox" | "other";
    6
    
    
    7
    function trimStr(v: unknown): string {
    8
      return String(v ?? "").trim();
    9
    }
    10
    
    
    11
    /** True if the field has a value that counts as “filled” for PDF-required validation. */
    12
    function isFilled(value: FieldValue, kind: FieldKind): boolean {
    13
      if (kind === "checkbox") return value === true || value === false;
    14
      if (kind === "text") {
    15
        return typeof value === "string" && value.trim().length > 0;
    16
      }
    17
      return value != null && String(value).trim().length > 0;
    18
    }
    19
    
    
    20
    type WidgetAnnotation = {
    21
      fieldName?: string;
    22
      fieldValue?: string | null;
    23
      fieldType?: string;
    24
      checkBox?: boolean;
    25
      required?: boolean;
    26
      fieldFlags?: number;
    27
    };
    28
    
    
    29
    const FIELD_FLAG_REQUIRED = 2;
    30
    
    
    31
    function isAnnotationRequired(a: WidgetAnnotation): boolean {
    32
      if (typeof a.required === "boolean") return a.required;
    33
      const ff = a.fieldFlags;
    34
      return typeof ff === "number" && (ff & FIELD_FLAG_REQUIRED) !== 0;
    35
    }
    36
    
    
    37
    function kindFromAnnotation(a: WidgetAnnotation): FieldKind {
    38
      const ft = a.fieldType;
    39
      if (ft === "Tx") return "text";
    40
      if (ft === "Btn") {
    41
        if (a.checkBox) return "checkbox";
    42
        return "other";
    43
      }
    44
      if (ft === "Ch") return "other";
    45
      return "other";
    46
    }
    47
    
    
    48
    function valueFromAnnotation(a: WidgetAnnotation): FieldValue {
    49
      const ft = a.fieldType;
    50
      const raw = a.fieldValue;
    51
    
    
    52
      if (ft === "Tx") return trimStr(raw ?? "");
    53
    
    
    54
      if (ft === "Btn" && a.checkBox) {
    55
        const v = raw == null ? "" : String(raw);
    56
        if (v === "" || v === "Off") return false;
    57
        return true;
    58
      }
    59
      if (ft === "Ch") return trimStr(raw ?? "");
    60
    
    
    61
      return null;
    62
    }
    63
    
    
    64
    /** pdf.js expects browser globals; Windmill workers do not provide `DOMMatrix`. */
    65
    async function ensureDomMatrixPolyfill(): Promise<void> {
    66
      if (typeof globalThis.DOMMatrix !== "undefined") return;
    67
      const mod = await import("dommatrix");
    68
      const DM = (mod as { default?: typeof globalThis.DOMMatrix }).default ?? (mod as { DOMMatrix: typeof globalThis.DOMMatrix }).DOMMatrix;
    69
      if (typeof DM === "function") {
    70
        Object.defineProperty(globalThis, "DOMMatrix", { value: DM, configurable: true });
    71
      }
    72
    }
    73
    
    
    74
    async function loadFormData(
    75
      pdfBytes: Uint8Array,
    76
      password: string,
    77
    ): Promise<{
    78
      values: Record<string, FieldValue>;
    79
      kinds: Record<string, FieldKind>;
    80
      available_fields: string[];
    81
      required_fields: string[];
    82
    }> {
    83
      await ensureDomMatrixPolyfill();
    84
      const { getDocument } = await import("pdfjs-dist/legacy/build/pdf.mjs");
    85
    
    
    86
      const loadingTask = getDocument({
    87
        data: pdfBytes,
    88
        password,
    89
        disableRange: true,
    90
        disableStream: true,
    91
        useSystemFonts: true,
    92
      });
    93
    
    
    94
      let pdf: { numPages: number; getPage: (n: number) => Promise<{ getAnnotations: () => Promise<unknown[]> }> };
    95
      try {
    96
        pdf = await loadingTask.promise;
    97
      } catch (e: unknown) {
    98
        const msg = e instanceof Error ? e.message : String(e);
    99
        if (/password/i.test(msg)) {
    100
          throw new Error(
    101
            "PDF needs a password or a different one. Set `pdfPassword` or use an unencrypted PDF.",
    102
          );
    103
        }
    104
        throw e;
    105
      }
    106
    
    
    107
      const kinds: Record<string, FieldKind> = {};
    108
      const values: Record<string, FieldValue> = {};
    109
      const requiredByName: Record<string, boolean> = {};
    110
    
    
    111
      for (let p = 1; p <= pdf.numPages; p++) {
    112
        const page = await pdf.getPage(p);
    113
        const annotations = (await page.getAnnotations()) as WidgetAnnotation[];
    114
    
    
    115
        for (const a of annotations) {
    116
          const name = a.fieldName?.trim();
    117
          if (!name) continue;
    118
    
    
    119
          const kind = kindFromAnnotation(a);
    120
          const v = valueFromAnnotation(a);
    121
          kinds[name] = kind;
    122
          values[name] = v;
    123
    
    
    124
          if (isAnnotationRequired(a)) requiredByName[name] = true;
    125
          else if (requiredByName[name] === undefined) requiredByName[name] = false;
    126
        }
    127
      }
    128
    
    
    129
      const available_fields = Object.keys(values).sort();
    130
      const required_fields = Object.keys(requiredByName)
    131
        .filter((n) => requiredByName[n])
    132
        .sort();
    133
    
    
    134
      return { values, kinds, available_fields, required_fields };
    135
    }
    136
    
    
    137
    export async function main(
    138
      nextcloud: RT.Nextcloud,
    139
      pdfPath: string,
    140
      pdfPassword: string | null = null,
    141
    ): Promise<{
    142
      values: Record<string, FieldValue>;
    143
      available_fields: string[];
    144
      required_fields: string[];
    145
      filled_out: boolean;
    146
    }> {
    147
      const getRes = await axios.get(
    148
        `${String(nextcloud.baseUrl || "").replace(/\/$/, "")}/remote.php/dav/files/${encodeURIComponent(nextcloud.userId)}/${pdfPath}`,
    149
        {
    150
          auth: {
    151
            username: nextcloud.userId,
    152
            password: nextcloud.token,
    153
          },
    154
          responseType: "arraybuffer",
    155
        },
    156
      );
    157
    
    
    158
      if (getRes.status !== 200) {
    159
        throw new Error(`Failed to download PDF (HTTP ${getRes.status}) ${getRes.statusText}`);
    160
      }
    161
    
    
    162
      const pdfBytes = new Uint8Array(getRes.data as ArrayBuffer);
    163
      const { values, kinds, available_fields, required_fields } = await loadFormData(
    164
        pdfBytes,
    165
        pdfPassword ?? "",
    166
      );
    167
    
    
    168
      const filled_out = required_fields.every((name) =>
    169
        isFilled(values[name], kinds[name] ?? "other"),
    170
      );
    171
    
    
    172
      return { values, available_fields, required_fields, filled_out };
    173
    }
    174