aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/hwmon/lm85.c
blob: 250d099ca3980e1dbb8131e4748610d137049658 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
/*
    lm85.c - Part of lm_sensors, Linux kernel modules for hardware
             monitoring
    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
    Copyright (c) 2002, 2003  Philip Pokorny <ppokorny@penguincomputing.com>
    Copyright (c) 2003        Margit Schubert-While <margitsw@t-online.de>
    Copyright (c) 2004        Justin Thiessen <jthiessen@penguincomputing.com>
    Copyright (C) 2007--2009  Jean Delvare <khali@linux-fr.org>

    Chip details at	      <http://www.national.com/ds/LM/LM85.pdf>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/i2c.h>
#include <linux/hwmon.h>
#include <linux/hwmon-vid.h>
#include <linux/hwmon-sysfs.h>
#include <linux/err.h>
#include <linux/mutex.h>

/* Addresses to scan */
static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };

enum chips {
	any_chip, lm85b, lm85c,
	adm1027, adt7463, adt7468,
	emc6d100, emc6d102, emc6d103, emc6d103s
};

/* The LM85 registers */

#define	LM85_REG_IN(nr)			(0x20 + (nr))
#define	LM85_REG_IN_MIN(nr)		(0x44 + (nr) * 2)
#define	LM85_REG_IN_MAX(nr)		(0x45 + (nr) * 2)

#define	LM85_REG_TEMP(nr)		(0x25 + (nr))
#define	LM85_REG_TEMP_MIN(nr)		(0x4e + (nr) * 2)
#define	LM85_REG_TEMP_MAX(nr)		(0x4f + (nr) * 2)

/* Fan speeds are LSB, MSB (2 bytes) */
#define	LM85_REG_FAN(nr)		(0x28 + (nr) * 2)
#define	LM85_REG_FAN_MIN(nr)		(0x54 + (nr) * 2)

#define	LM85_REG_PWM(nr)		(0x30 + (nr))

#define	LM85_REG_COMPANY		0x3e
#define	LM85_REG_VERSTEP		0x3f

#define	ADT7468_REG_CFG5		0x7c
#define		ADT7468_OFF64		(1 << 0)
#define		ADT7468_HFPWM		(1 << 1)
#define	IS_ADT7468_OFF64(data)		\
	((data)->type == adt7468 && !((data)->cfg5 & ADT7468_OFF64))
#define	IS_ADT7468_HFPWM(data)		\
	((data)->type == adt7468 && !((data)->cfg5 & ADT7468_HFPWM))

/* These are the recognized values for the above regs */
#define	LM85_COMPANY_NATIONAL		0x01
#define	LM85_COMPANY_ANALOG_DEV		0x41
#define	LM85_COMPANY_SMSC		0x5c
#define	LM85_VERSTEP_VMASK              0xf0
#define	LM85_VERSTEP_GENERIC		0x60
#define	LM85_VERSTEP_GENERIC2		0x70
#define	LM85_VERSTEP_LM85C		0x60
#define	LM85_VERSTEP_LM85B		0x62
#define	LM85_VERSTEP_LM96000_1		0x68
#define	LM85_VERSTEP_LM96000_2		0x69
#define	LM85_VERSTEP_ADM1027		0x60
#define	LM85_VERSTEP_ADT7463		0x62
#define	LM85_VERSTEP_ADT7463C		0x6A
#define	LM85_VERSTEP_ADT7468_1		0x71
#define	LM85_VERSTEP_ADT7468_2		0x72
#define	LM85_VERSTEP_EMC6D100_A0        0x60
#define	LM85_VERSTEP_EMC6D100_A1        0x61
#define	LM85_VERSTEP_EMC6D102		0x65
#define	LM85_VERSTEP_EMC6D103_A0	0x68
#define	LM85_VERSTEP_EMC6D103_A1	0x69
#define	LM85_VERSTEP_EMC6D103S		0x6A	/* Also known as EMC6D103:A2 */

#define	LM85_REG_CONFIG			0x40

#define	LM85_REG_ALARM1			0x41
#define	LM85_REG_ALARM2			0x42

#define	LM85_REG_VID			0x43

/* Automated FAN control */
#define	LM85_REG_AFAN_CONFIG(nr)	(0x5c + (nr))
#define	LM85_REG_AFAN_RANGE(nr)		(0x5f + (nr))
#define	LM85_REG_AFAN_SPIKE1		0x62
#define	LM85_REG_AFAN_MINPWM(nr)	(0x64 + (nr))
#define	LM85_REG_AFAN_LIMIT(nr)		(0x67 + (nr))
#define	LM85_REG_AFAN_CRITICAL(nr)	(0x6a + (nr))
#define	LM85_REG_AFAN_HYST1		0x6d
#define	LM85_REG_AFAN_HYST2		0x6e

#define	ADM1027_REG_EXTEND_ADC1		0x76
#define	ADM1027_REG_EXTEND_ADC2		0x77

#define EMC6D100_REG_ALARM3             0x7d
/* IN5, IN6 and IN7 */
#define	EMC6D100_REG_IN(nr)             (0x70 + ((nr) - 5))
#define	EMC6D100_REG_IN_MIN(nr)         (0x73 + ((nr) - 5) * 2)
#define	EMC6D100_REG_IN_MAX(nr)         (0x74 + ((nr) - 5) * 2)
#define	EMC6D102_REG_EXTEND_ADC1	0x85
#define	EMC6D102_REG_EXTEND_ADC2	0x86
#define	EMC6D102_REG_EXTEND_ADC3	0x87
#define	EMC6D102_REG_EXTEND_ADC4	0x88


/* Conversions. Rounding and limit checking is only done on the TO_REG
   variants. Note that you should be a bit careful with which arguments
   these macros are called: arguments may be evaluated more than once.
 */

/* IN are scaled according to built-in resistors */
static const int lm85_scaling[] = {  /* .001 Volts */
	2500, 2250, 3300, 5000, 12000,
	3300, 1500, 1800 /*EMC6D100*/
};
#define SCALE(val, from, to)	(((val) * (to) + ((from) / 2)) / (from))

#define INS_TO_REG(n, val)	\
		SENSORS_LIMIT(SCALE(val, lm85_scaling[n], 192), 0, 255)

#define INSEXT_FROM_REG(n, val, ext)	\
		SCALE(((val) << 4) + (ext), 192 << 4, lm85_scaling[n])

#define INS_FROM_REG(n, val)	SCALE((val), 192, lm85_scaling[n])

/* FAN speed is measured using 90kHz clock */
static inline u16 FAN_TO_REG(unsigned long val)
{
	if (!val)
		return 0xffff;
	return SENSORS_LIMIT(5400000 / val, 1, 0xfffe);
}
#define FAN_FROM_REG(val)	((val) == 0 ? -1 : (val) == 0xffff ? 0 : \
				 5400000 / (val))

/* Temperature is reported in .001 degC increments */
#define TEMP_TO_REG(val)	\
		SENSORS_LIMIT(SCALE(val, 1000, 1), -127, 127)
#define TEMPEXT_FROM_REG(val, ext)	\
		SCALE(((val) << 4) + (ext), 16, 1000)
#define TEMP_FROM_REG(val)	((val) * 1000)

#define PWM_TO_REG(val)			SENSORS_LIMIT(val, 0, 255)
#define PWM_FROM_REG(val)		(val)


/* ZONEs have the following parameters:
 *    Limit (low) temp,           1. degC
 *    Hysteresis (below limit),   1. degC (0-15)
 *    Range of speed control,     .1 degC (2-80)
 *    Critical (high) temp,       1. degC
 *
 * FAN PWMs have the following parameters:
 *    Reference Zone,                 1, 2, 3, etc.
 *    Spinup time,                    .05 sec
 *    PWM value at limit/low temp,    1 count
 *    PWM Frequency,                  1. Hz
 *    PWM is Min or OFF below limit,  flag
 *    Invert PWM output,              flag
 *
 * Some chips filter the temp, others the fan.
 *    Filter constant (or disabled)   .1 seconds
 */

/* These are the zone temperature range encodings in .001 degree C */
static const int lm85_range_map[] = {
	2000, 2500, 3300, 4000, 5000, 6600, 8000, 10000,
	13300, 16000, 20000, 26600, 32000, 40000, 53300, 80000
};

static int RANGE_TO_REG(int range)
{
	int i;

	/* Find the closest match */
	for (i = 0; i < 15; ++i) {
		if (range <= (lm85_range_map[i] + lm85_range_map[i + 1]) / 2)
			break;
	}

	return i;
}
#define RANGE_FROM_REG(val)	lm85_range_map[(val) & 0x0f]

/* These are the PWM frequency encodings */
static const int lm85_freq_map[8] = { /* 1 Hz */
	10, 15, 23, 30, 38, 47, 61, 94
};
static const int adm1027_freq_map[8] = { /* 1 Hz */
	11, 15, 22, 29, 35, 44, 59, 88
};

static int FREQ_TO_REG(const int *map, int freq)
{
	int i;

	/* Find the closest match */
	for (i = 0; i < 7; ++i)
		if (freq <= (map[i] + map[i + 1]) / 2)
			break;
	return i;
}

static int FREQ_FROM_REG(const int *map, u8 reg)
{
	return map[reg & 0x07];
}

/* Since we can't use strings, I'm abusing these numbers
 *   to stand in for the following meanings:
 *      1 -- PWM responds to Zone 1
 *      2 -- PWM responds to Zone 2
 *      3 -- PWM responds to Zone 3
 *     23 -- PWM responds to the higher temp of Zone 2 or 3
 *    123 -- PWM responds to highest of Zone 1, 2, or 3
 *      0 -- PWM is always at 0% (ie, off)
 *     -1 -- PWM is always at 100%
 *     -2 -- PWM responds to manual control
 */

static const int lm85_zone_map[] = { 1, 2, 3, -1, 0, 23, 123, -2 };
#define ZONE_FROM_REG(val)	lm85_zone_map[(val) >> 5]

static int ZONE_TO_REG(int zone)
{
	int i;

	for (i = 0; i <= 7; ++i)
		if (zone == lm85_zone_map[i])
			break;
	if (i > 7)   /* Not found. */
		i = 3;  /* Always 100% */
	return i << 5;
}

#define HYST_TO_REG(val)	SENSORS_LIMIT(((val) + 500) / 1000, 0, 15)
#define HYST_FROM_REG(val)	((val) * 1000)

/* Chip sampling rates
 *
 * Some sensors are not updated more frequently than once per second
 *    so it doesn't make sense to read them more often than that.
 *    We cache the results and return the saved data if the driver
 *    is called again before a second has elapsed.
 *
 * Also, there is significant configuration data for this chip
 *    given the automatic PWM fan control that is possible.  There
 *    are about 47 bytes of config data to only 22 bytes of actual
 *    readings.  So, we keep the config data up to date in the cache
 *    when it is written and only sample it once every 1 *minute*
 */
#define LM85_DATA_INTERVAL  (HZ + HZ / 2)
#define LM85_CONFIG_INTERVAL  (1 * 60 * HZ)

/* LM85 can automatically adjust fan speeds based on temperature
 * This structure encapsulates an entire Zone config.  There are
 * three zones (one for each temperature input) on the lm85
 */
struct lm85_zone {
	s8 limit;	/* Low temp limit */
	u8 hyst;	/* Low limit hysteresis. (0-15) */
	u8 range;	/* Temp range, encoded */
	s8 critical;	/* "All fans ON" temp limit */
	u8 max_desired; /* Actual "max" temperature specified.  Preserved
			 * to prevent "drift" as other autofan control
			 * values change.
			 */
};

struct lm85_autofan {
	u8 config;	/* Register value */
	u8 min_pwm;	/* Minimum PWM value, encoded */
	u8 min_off;	/* Min PWM or OFF below "limit", flag */
};

/* For each registered chip, we need to keep some data in memory.
   The structure is dynamically allocated. */
struct lm85_data {
	struct device *hwmon_dev;
	const int *freq_map;
	enum chips type;

	bool has_vid5;	/* true if VID5 is configured for ADT7463 or ADT7468 */

	struct mutex update_lock;
	int valid;		/* !=0 if following fields are valid */
	unsigned long last_reading;	/* In jiffies */
	unsigned long last_config;	/* In jiffies */

	u8 in[8];		/* Register value */
	u8 in_max[8];		/* Register value */
	u8 in_min[8];		/* Register value */
	s8 temp[3];		/* Register value */
	s8 temp_min[3];		/* Register value */
	s8 temp_max[3];		/* Register value */
	u16 fan[4];		/* Register value */
	u16 fan_min[4];		/* Register value */
	u8 pwm[3];		/* Register value */
	u8 pwm_freq[3];		/* Register encoding */
	u8 temp_ext[3];		/* Decoded values */
	u8 in_ext[8];		/* Decoded values */
	u8 vid;			/* Register value */
	u8 vrm;			/* VRM version */
	u32 alarms;		/* Register encoding, combined */
	u8 cfg5;		/* Config Register 5 on ADT7468 */
	struct lm85_autofan autofan[3];
	struct lm85_zone zone[3];
};

static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info);
static int lm85_probe(struct i2c_client *client,
		      const struct i2c_device_id *id);
static int lm85_remove(struct i2c_client *client);

static int lm85_read_value(struct i2c_client *client, u8 reg);
static void lm85_write_value(struct i2c_client *client, u8 reg, int value);
static struct lm85_data *lm85_update_device(struct device *dev);


static const struct i2c_device_id lm85_id[] = {
	{ "adm1027", adm1027 },
	{ "adt7463", adt7463 },
	{ "adt7468", adt7468 },
	{ "lm85", any_chip },
	{ "lm85b", lm85b },
	{ "lm85c", lm85c },
	{ "emc6d100", emc6d100 },
	{ "emc6d101", emc6d100 },
	{ "emc6d102", emc6d102 },
	{ "emc6d103", emc6d103 },
	{ "emc6d103s", emc6d103s },
	{ }
};
MODULE_DEVICE_TABLE(i2c, lm85_id);

static struct i2c_driver lm85_driver = {
	.class		= I2C_CLASS_HWMON,
	.driver = {
		.name   = "lm85",
	},
	.probe		= lm85_probe,
	.remove		= lm85_remove,
	.id_table	= lm85_id,
	.detect		= lm85_detect,
	.address_list	= normal_i2c,
};


/* 4 Fans */
static ssize_t show_fan(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan[nr]));
}

static ssize_t show_fan_min(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan_min[nr]));
}

static ssize_t set_fan_min(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	unsigned long val = simple_strtoul(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->fan_min[nr] = FAN_TO_REG(val);
	lm85_write_value(client, LM85_REG_FAN_MIN(nr), data->fan_min[nr]);
	mutex_unlock(&data->update_lock);
	return count;
}

#define show_fan_offset(offset)						\
static SENSOR_DEVICE_ATTR(fan##offset##_input, S_IRUGO,			\
		show_fan, NULL, offset - 1);				\
static SENSOR_DEVICE_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR,		\
		show_fan_min, set_fan_min, offset - 1)

show_fan_offset(1);
show_fan_offset(2);
show_fan_offset(3);
show_fan_offset(4);

/* vid, vrm, alarms */

static ssize_t show_vid_reg(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	struct lm85_data *data = lm85_update_device(dev);
	int vid;

	if (data->has_vid5) {
		/* 6-pin VID (VRM 10) */
		vid = vid_from_reg(data->vid & 0x3f, data->vrm);
	} else {
		/* 5-pin VID (VRM 9) */
		vid = vid_from_reg(data->vid & 0x1f, data->vrm);
	}

	return sprintf(buf, "%d\n", vid);
}

static DEVICE_ATTR(cpu0_vid, S_IRUGO, show_vid_reg, NULL);

static ssize_t show_vrm_reg(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	struct lm85_data *data = dev_get_drvdata(dev);
	return sprintf(buf, "%ld\n", (long) data->vrm);
}

static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count)
{
	struct lm85_data *data = dev_get_drvdata(dev);
	data->vrm = simple_strtoul(buf, NULL, 10);
	return count;
}

static DEVICE_ATTR(vrm, S_IRUGO | S_IWUSR, show_vrm_reg, store_vrm_reg);

static ssize_t show_alarms_reg(struct device *dev, struct device_attribute
		*attr, char *buf)
{
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%u\n", data->alarms);
}

static DEVICE_ATTR(alarms, S_IRUGO, show_alarms_reg, NULL);

static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%u\n", (data->alarms >> nr) & 1);
}

static SENSOR_DEVICE_ATTR(in0_alarm, S_IRUGO, show_alarm, NULL, 0);
static SENSOR_DEVICE_ATTR(in1_alarm, S_IRUGO, show_alarm, NULL, 1);
static SENSOR_DEVICE_ATTR(in2_alarm, S_IRUGO, show_alarm, NULL, 2);
static SENSOR_DEVICE_ATTR(in3_alarm, S_IRUGO, show_alarm, NULL, 3);
static SENSOR_DEVICE_ATTR(in4_alarm, S_IRUGO, show_alarm, NULL, 8);
static SENSOR_DEVICE_ATTR(in5_alarm, S_IRUGO, show_alarm, NULL, 18);
static SENSOR_DEVICE_ATTR(in6_alarm, S_IRUGO, show_alarm, NULL, 16);
static SENSOR_DEVICE_ATTR(in7_alarm, S_IRUGO, show_alarm, NULL, 17);
static SENSOR_DEVICE_ATTR(temp1_alarm, S_IRUGO, show_alarm, NULL, 4);
static SENSOR_DEVICE_ATTR(temp1_fault, S_IRUGO, show_alarm, NULL, 14);
static SENSOR_DEVICE_ATTR(temp2_alarm, S_IRUGO, show_alarm, NULL, 5);
static SENSOR_DEVICE_ATTR(temp3_alarm, S_IRUGO, show_alarm, NULL, 6);
static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_alarm, NULL, 15);
static SENSOR_DEVICE_ATTR(fan1_alarm, S_IRUGO, show_alarm, NULL, 10);
static SENSOR_DEVICE_ATTR(fan2_alarm, S_IRUGO, show_alarm, NULL, 11);
static SENSOR_DEVICE_ATTR(fan3_alarm, S_IRUGO, show_alarm, NULL, 12);
static SENSOR_DEVICE_ATTR(fan4_alarm, S_IRUGO, show_alarm, NULL, 13);

/* pwm */

static ssize_t show_pwm(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", PWM_FROM_REG(data->pwm[nr]));
}

static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->pwm[nr] = PWM_TO_REG(val);
	lm85_write_value(client, LM85_REG_PWM(nr), data->pwm[nr]);
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_pwm_enable(struct device *dev, struct device_attribute
		*attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	int pwm_zone, enable;

	pwm_zone = ZONE_FROM_REG(data->autofan[nr].config);
	switch (pwm_zone) {
	case -1:	/* PWM is always at 100% */
		enable = 0;
		break;
	case 0:		/* PWM is always at 0% */
	case -2:	/* PWM responds to manual control */
		enable = 1;
		break;
	default:	/* PWM in automatic mode */
		enable = 2;
	}
	return sprintf(buf, "%d\n", enable);
}

static ssize_t set_pwm_enable(struct device *dev, struct device_attribute
		*attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);
	u8 config;

	switch (val) {
	case 0:
		config = 3;
		break;
	case 1:
		config = 7;
		break;
	case 2:
		/* Here we have to choose arbitrarily one of the 5 possible
		   configurations; I go for the safest */
		config = 6;
		break;
	default:
		return -EINVAL;
	}

	mutex_lock(&data->update_lock);
	data->autofan[nr].config = lm85_read_value(client,
		LM85_REG_AFAN_CONFIG(nr));
	data->autofan[nr].config = (data->autofan[nr].config & ~0xe0)
		| (config << 5);
	lm85_write_value(client, LM85_REG_AFAN_CONFIG(nr),
		data->autofan[nr].config);
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_pwm_freq(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	int freq;

	if (IS_ADT7468_HFPWM(data))
		freq = 22500;
	else
		freq = FREQ_FROM_REG(data->freq_map, data->pwm_freq[nr]);

	return sprintf(buf, "%d\n", freq);
}

static ssize_t set_pwm_freq(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	/* The ADT7468 has a special high-frequency PWM output mode,
	 * where all PWM outputs are driven by a 22.5 kHz clock.
	 * This might confuse the user, but there's not much we can do. */
	if (data->type == adt7468 && val >= 11300) {	/* High freq. mode */
		data->cfg5 &= ~ADT7468_HFPWM;
		lm85_write_value(client, ADT7468_REG_CFG5, data->cfg5);
	} else {					/* Low freq. mode */
		data->pwm_freq[nr] = FREQ_TO_REG(data->freq_map, val);
		lm85_write_value(client, LM85_REG_AFAN_RANGE(nr),
				 (data->zone[nr].range << 4)
				 | data->pwm_freq[nr]);
		if (data->type == adt7468) {
			data->cfg5 |= ADT7468_HFPWM;
			lm85_write_value(client, ADT7468_REG_CFG5, data->cfg5);
		}
	}
	mutex_unlock(&data->update_lock);
	return count;
}

#define show_pwm_reg(offset)						\
static SENSOR_DEVICE_ATTR(pwm##offset, S_IRUGO | S_IWUSR,		\
		show_pwm, set_pwm, offset - 1);				\
static SENSOR_DEVICE_ATTR(pwm##offset##_enable, S_IRUGO | S_IWUSR,	\
		show_pwm_enable, set_pwm_enable, offset - 1);		\
static SENSOR_DEVICE_ATTR(pwm##offset##_freq, S_IRUGO | S_IWUSR,	\
		show_pwm_freq, set_pwm_freq, offset - 1)

show_pwm_reg(1);
show_pwm_reg(2);
show_pwm_reg(3);

/* Voltages */

static ssize_t show_in(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", INSEXT_FROM_REG(nr, data->in[nr],
						    data->in_ext[nr]));
}

static ssize_t show_in_min(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", INS_FROM_REG(nr, data->in_min[nr]));
}

static ssize_t set_in_min(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->in_min[nr] = INS_TO_REG(nr, val);
	lm85_write_value(client, LM85_REG_IN_MIN(nr), data->in_min[nr]);
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_in_max(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", INS_FROM_REG(nr, data->in_max[nr]));
}

static ssize_t set_in_max(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->in_max[nr] = INS_TO_REG(nr, val);
	lm85_write_value(client, LM85_REG_IN_MAX(nr), data->in_max[nr]);
	mutex_unlock(&data->update_lock);
	return count;
}

#define show_in_reg(offset)						\
static SENSOR_DEVICE_ATTR(in##offset##_input, S_IRUGO,			\
		show_in, NULL, offset);					\
static SENSOR_DEVICE_ATTR(in##offset##_min, S_IRUGO | S_IWUSR,		\
		show_in_min, set_in_min, offset);			\
static SENSOR_DEVICE_ATTR(in##offset##_max, S_IRUGO | S_IWUSR,		\
		show_in_max, set_in_max, offset)

show_in_reg(0);
show_in_reg(1);
show_in_reg(2);
show_in_reg(3);
show_in_reg(4);
show_in_reg(5);
show_in_reg(6);
show_in_reg(7);

/* Temps */

static ssize_t show_temp(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", TEMPEXT_FROM_REG(data->temp[nr],
						     data->temp_ext[nr]));
}

static ssize_t show_temp_min(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_min[nr]));
}

static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	if (IS_ADT7468_OFF64(data))
		val += 64;

	mutex_lock(&data->update_lock);
	data->temp_min[nr] = TEMP_TO_REG(val);
	lm85_write_value(client, LM85_REG_TEMP_MIN(nr), data->temp_min[nr]);
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_temp_max(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_max[nr]));
}

static ssize_t set_temp_max(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	if (IS_ADT7468_OFF64(data))
		val += 64;

	mutex_lock(&data->update_lock);
	data->temp_max[nr] = TEMP_TO_REG(val);
	lm85_write_value(client, LM85_REG_TEMP_MAX(nr), data->temp_max[nr]);
	mutex_unlock(&data->update_lock);
	return count;
}

#define show_temp_reg(offset)						\
static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO,		\
		show_temp, NULL, offset - 1);				\
static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR,	\
		show_temp_min, set_temp_min, offset - 1);		\
static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR,	\
		show_temp_max, set_temp_max, offset - 1);

show_temp_reg(1);
show_temp_reg(2);
show_temp_reg(3);


/* Automatic PWM control */

static ssize_t show_pwm_auto_channels(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", ZONE_FROM_REG(data->autofan[nr].config));
}

static ssize_t set_pwm_auto_channels(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->autofan[nr].config = (data->autofan[nr].config & (~0xe0))
		| ZONE_TO_REG(val);
	lm85_write_value(client, LM85_REG_AFAN_CONFIG(nr),
		data->autofan[nr].config);
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_pwm_auto_pwm_min(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", PWM_FROM_REG(data->autofan[nr].min_pwm));
}

static ssize_t set_pwm_auto_pwm_min(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->autofan[nr].min_pwm = PWM_TO_REG(val);
	lm85_write_value(client, LM85_REG_AFAN_MINPWM(nr),
		data->autofan[nr].min_pwm);
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_pwm_auto_pwm_minctl(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", data->autofan[nr].min_off);
}

static ssize_t set_pwm_auto_pwm_minctl(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);
	u8 tmp;

	mutex_lock(&data->update_lock);
	data->autofan[nr].min_off = val;
	tmp = lm85_read_value(client, LM85_REG_AFAN_SPIKE1);
	tmp &= ~(0x20 << nr);
	if (data->autofan[nr].min_off)
		tmp |= 0x20 << nr;
	lm85_write_value(client, LM85_REG_AFAN_SPIKE1, tmp);
	mutex_unlock(&data->update_lock);
	return count;
}

#define pwm_auto(offset)						\
static SENSOR_DEVICE_ATTR(pwm##offset##_auto_channels,			\
		S_IRUGO | S_IWUSR, show_pwm_auto_channels,		\
		set_pwm_auto_channels, offset - 1);			\
static SENSOR_DEVICE_ATTR(pwm##offset##_auto_pwm_min,			\
		S_IRUGO | S_IWUSR, show_pwm_auto_pwm_min,		\
		set_pwm_auto_pwm_min, offset - 1);			\
static SENSOR_DEVICE_ATTR(pwm##offset##_auto_pwm_minctl,		\
		S_IRUGO | S_IWUSR, show_pwm_auto_pwm_minctl,		\
		set_pwm_auto_pwm_minctl, offset - 1)

pwm_auto(1);
pwm_auto(2);
pwm_auto(3);

/* Temperature settings for automatic PWM control */

static ssize_t show_temp_auto_temp_off(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->zone[nr].limit) -
		HYST_FROM_REG(data->zone[nr].hyst));
}

static ssize_t set_temp_auto_temp_off(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	int min;
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	min = TEMP_FROM_REG(data->zone[nr].limit);
	data->zone[nr].hyst = HYST_TO_REG(min - val);
	if (nr == 0 || nr == 1) {
		lm85_write_value(client, LM85_REG_AFAN_HYST1,
			(data->zone[0].hyst << 4)
			| data->zone[1].hyst);
	} else {
		lm85_write_value(client, LM85_REG_AFAN_HYST2,
			(data->zone[2].hyst << 4));
	}
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_temp_auto_temp_min(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->zone[nr].limit));
}

static ssize_t set_temp_auto_temp_min(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->zone[nr].limit = TEMP_TO_REG(val);
	lm85_write_value(client, LM85_REG_AFAN_LIMIT(nr),
		data->zone[nr].limit);

/* Update temp_auto_max and temp_auto_range */
	data->zone[nr].range = RANGE_TO_REG(
		TEMP_FROM_REG(data->zone[nr].max_desired) -
		TEMP_FROM_REG(data->zone[nr].limit));
	lm85_write_value(client, LM85_REG_AFAN_RANGE(nr),
		((data->zone[nr].range & 0x0f) << 4)
		| (data->pwm_freq[nr] & 0x07));

	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_temp_auto_temp_max(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->zone[nr].limit) +
		RANGE_FROM_REG(data->zone[nr].range));
}

static ssize_t set_temp_auto_temp_max(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	int min;
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	min = TEMP_FROM_REG(data->zone[nr].limit);
	data->zone[nr].max_desired = TEMP_TO_REG(val);
	data->zone[nr].range = RANGE_TO_REG(
		val - min);
	lm85_write_value(client, LM85_REG_AFAN_RANGE(nr),
		((data->zone[nr].range & 0x0f) << 4)
		| (data->pwm_freq[nr] & 0x07));
	mutex_unlock(&data->update_lock);
	return count;
}

static ssize_t show_temp_auto_temp_crit(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct lm85_data *data = lm85_update_device(dev);
	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->zone[nr].critical));
}

static ssize_t set_temp_auto_temp_crit(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	int nr = to_sensor_dev_attr(attr)->index;
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	long val = simple_strtol(buf, NULL, 10);

	mutex_lock(&data->update_lock);
	data->zone[nr].critical = TEMP_TO_REG(val);
	lm85_write_value(client, LM85_REG_AFAN_CRITICAL(nr),
		data->zone[nr].critical);
	mutex_unlock(&data->update_lock);
	return count;
}

#define temp_auto(offset)						\
static SENSOR_DEVICE_ATTR(temp##offset##_auto_temp_off,			\
		S_IRUGO | S_IWUSR, show_temp_auto_temp_off,		\
		set_temp_auto_temp_off, offset - 1);			\
static SENSOR_DEVICE_ATTR(temp##offset##_auto_temp_min,			\
		S_IRUGO | S_IWUSR, show_temp_auto_temp_min,		\
		set_temp_auto_temp_min, offset - 1);			\
static SENSOR_DEVICE_ATTR(temp##offset##_auto_temp_max,			\
		S_IRUGO | S_IWUSR, show_temp_auto_temp_max,		\
		set_temp_auto_temp_max, offset - 1);			\
static SENSOR_DEVICE_ATTR(temp##offset##_auto_temp_crit,		\
		S_IRUGO | S_IWUSR, show_temp_auto_temp_crit,		\
		set_temp_auto_temp_crit, offset - 1);

temp_auto(1);
temp_auto(2);
temp_auto(3);

static struct attribute *lm85_attributes[] = {
	&sensor_dev_attr_fan1_input.dev_attr.attr,
	&sensor_dev_attr_fan2_input.dev_attr.attr,
	&sensor_dev_attr_fan3_input.dev_attr.attr,
	&sensor_dev_attr_fan4_input.dev_attr.attr,
	&sensor_dev_attr_fan1_min.dev_attr.attr,
	&sensor_dev_attr_fan2_min.dev_attr.attr,
	&sensor_dev_attr_fan3_min.dev_attr.attr,
	&sensor_dev_attr_fan4_min.dev_attr.attr,
	&sensor_dev_attr_fan1_alarm.dev_attr.attr,
	&sensor_dev_attr_fan2_alarm.dev_attr.attr,
	&sensor_dev_attr_fan3_alarm.dev_attr.attr,
	&sensor_dev_attr_fan4_alarm.dev_attr.attr,

	&sensor_dev_attr_pwm1.dev_attr.attr,
	&sensor_dev_attr_pwm2.dev_attr.attr,
	&sensor_dev_attr_pwm3.dev_attr.attr,
	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
	&sensor_dev_attr_pwm2_enable.dev_attr.attr,
	&sensor_dev_attr_pwm3_enable.dev_attr.attr,
	&sensor_dev_attr_pwm1_freq.dev_attr.attr,
	&sensor_dev_attr_pwm2_freq.dev_attr.attr,
	&sensor_dev_attr_pwm3_freq.dev_attr.attr,

	&sensor_dev_attr_in0_input.dev_attr.attr,
	&sensor_dev_attr_in1_input.dev_attr.attr,
	&sensor_dev_attr_in2_input.dev_attr.attr,
	&sensor_dev_attr_in3_input.dev_attr.attr,
	&sensor_dev_attr_in0_min.dev_attr.attr,
	&sensor_dev_attr_in1_min.dev_attr.attr,
	&sensor_dev_attr_in2_min.dev_attr.attr,
	&sensor_dev_attr_in3_min.dev_attr.attr,
	&sensor_dev_attr_in0_max.dev_attr.attr,
	&sensor_dev_attr_in1_max.dev_attr.attr,
	&sensor_dev_attr_in2_max.dev_attr.attr,
	&sensor_dev_attr_in3_max.dev_attr.attr,
	&sensor_dev_attr_in0_alarm.dev_attr.attr,
	&sensor_dev_attr_in1_alarm.dev_attr.attr,
	&sensor_dev_attr_in2_alarm.dev_attr.attr,
	&sensor_dev_attr_in3_alarm.dev_attr.attr,

	&sensor_dev_attr_temp1_input.dev_attr.attr,
	&sensor_dev_attr_temp2_input.dev_attr.attr,
	&sensor_dev_attr_temp3_input.dev_attr.attr,
	&sensor_dev_attr_temp1_min.dev_attr.attr,
	&sensor_dev_attr_temp2_min.dev_attr.attr,
	&sensor_dev_attr_temp3_min.dev_attr.attr,
	&sensor_dev_attr_temp1_max.dev_attr.attr,
	&sensor_dev_attr_temp2_max.dev_attr.attr,
	&sensor_dev_attr_temp3_max.dev_attr.attr,
	&sensor_dev_attr_temp1_alarm.dev_attr.attr,
	&sensor_dev_attr_temp2_alarm.dev_attr.attr,
	&sensor_dev_attr_temp3_alarm.dev_attr.attr,
	&sensor_dev_attr_temp1_fault.dev_attr.attr,
	&sensor_dev_attr_temp3_fault.dev_attr.attr,

	&sensor_dev_attr_pwm1_auto_channels.dev_attr.attr,
	&sensor_dev_attr_pwm2_auto_channels.dev_attr.attr,
	&sensor_dev_attr_pwm3_auto_channels.dev_attr.attr,
	&sensor_dev_attr_pwm1_auto_pwm_min.dev_attr.attr,
	&sensor_dev_attr_pwm2_auto_pwm_min.dev_attr.attr,
	&sensor_dev_attr_pwm3_auto_pwm_min.dev_attr.attr,

	&sensor_dev_attr_temp1_auto_temp_min.dev_attr.attr,
	&sensor_dev_attr_temp2_auto_temp_min.dev_attr.attr,
	&sensor_dev_attr_temp3_auto_temp_min.dev_attr.attr,
	&sensor_dev_attr_temp1_auto_temp_max.dev_attr.attr,
	&sensor_dev_attr_temp2_auto_temp_max.dev_attr.attr,
	&sensor_dev_attr_temp3_auto_temp_max.dev_attr.attr,
	&sensor_dev_attr_temp1_auto_temp_crit.dev_attr.attr,
	&sensor_dev_attr_temp2_auto_temp_crit.dev_attr.attr,
	&sensor_dev_attr_temp3_auto_temp_crit.dev_attr.attr,

	&dev_attr_vrm.attr,
	&dev_attr_cpu0_vid.attr,
	&dev_attr_alarms.attr,
	NULL
};

static const struct attribute_group lm85_group = {
	.attrs = lm85_attributes,
};

static struct attribute *lm85_attributes_minctl[] = {
	&sensor_dev_attr_pwm1_auto_pwm_minctl.dev_attr.attr,
	&sensor_dev_attr_pwm2_auto_pwm_minctl.dev_attr.attr,
	&sensor_dev_attr_pwm3_auto_pwm_minctl.dev_attr.attr,
};

static const struct attribute_group lm85_group_minctl = {
	.attrs = lm85_attributes_minctl,
};

static struct attribute *lm85_attributes_temp_off[] = {
	&sensor_dev_attr_temp1_auto_temp_off.dev_attr.attr,
	&sensor_dev_attr_temp2_auto_temp_off.dev_attr.attr,
	&sensor_dev_attr_temp3_auto_temp_off.dev_attr.attr,
};

static const struct attribute_group lm85_group_temp_off = {
	.attrs = lm85_attributes_temp_off,
};

static struct attribute *lm85_attributes_in4[] = {
	&sensor_dev_attr_in4_input.dev_attr.attr,
	&sensor_dev_attr_in4_min.dev_attr.attr,
	&sensor_dev_attr_in4_max.dev_attr.attr,
	&sensor_dev_attr_in4_alarm.dev_attr.attr,
	NULL
};

static const struct attribute_group lm85_group_in4 = {
	.attrs = lm85_attributes_in4,
};

static struct attribute *lm85_attributes_in567[] = {
	&sensor_dev_attr_in5_input.dev_attr.attr,
	&sensor_dev_attr_in6_input.dev_attr.attr,
	&sensor_dev_attr_in7_input.dev_attr.attr,
	&sensor_dev_attr_in5_min.dev_attr.attr,
	&sensor_dev_attr_in6_min.dev_attr.attr,
	&sensor_dev_attr_in7_min.dev_attr.attr,
	&sensor_dev_attr_in5_max.dev_attr.attr,
	&sensor_dev_attr_in6_max.dev_attr.attr,
	&sensor_dev_attr_in7_max.dev_attr.attr,
	&sensor_dev_attr_in5_alarm.dev_attr.attr,
	&sensor_dev_attr_in6_alarm.dev_attr.attr,
	&sensor_dev_attr_in7_alarm.dev_attr.attr,
	NULL
};

static const struct attribute_group lm85_group_in567 = {
	.attrs = lm85_attributes_in567,
};

static void lm85_init_client(struct i2c_client *client)
{
	int value;

	/* Start monitoring if needed */
	value = lm85_read_value(client, LM85_REG_CONFIG);
	if (!(value & 0x01)) {
		dev_info(&client->dev, "Starting monitoring\n");
		lm85_write_value(client, LM85_REG_CONFIG, value | 0x01);
	}

	/* Warn about unusual configuration bits */
	if (value & 0x02)
		dev_warn(&client->dev, "Device configuration is locked\n");
	if (!(value & 0x04))
		dev_warn(&client->dev, "Device is not ready\n");
}

static int lm85_is_fake(struct i2c_client *client)
{
	/*
	 * Differenciate between real LM96000 and Winbond WPCD377I. The latter
	 * emulate the former except that it has no hardware monitoring function
	 * so the readings are always 0.
	 */
	int i;
	u8 in_temp, fan;

	for (i = 0; i < 8; i++) {
		in_temp = i2c_smbus_read_byte_data(client, 0x20 + i);
		fan = i2c_smbus_read_byte_data(client, 0x28 + i);
		if (in_temp != 0x00 || fan != 0xff)
			return 0;
	}

	return 1;
}

/* Return 0 if detection is successful, -ENODEV otherwise */
static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info)
{
	struct i2c_adapter *adapter = client->adapter;
	int address = client->addr;
	const char *type_name;
	int company, verstep;

	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
		/* We need to be able to do byte I/O */
		return -ENODEV;
	}

	/* Determine the chip type */
	company = lm85_read_value(client, LM85_REG_COMPANY);
	verstep = lm85_read_value(client, LM85_REG_VERSTEP);

	dev_dbg(&adapter->dev, "Detecting device at 0x%02x with "
		"COMPANY: 0x%02x and VERSTEP: 0x%02x\n",
		address, company, verstep);

	/* All supported chips have the version in common */
	if ((verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC &&
	    (verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC2) {
		dev_dbg(&adapter->dev,
			"Autodetection failed: unsupported version\n");
		return -ENODEV;
	}
	type_name = "lm85";

	/* Now, refine the detection */
	if (company == LM85_COMPANY_NATIONAL) {
		switch (verstep) {
		case LM85_VERSTEP_LM85C:
			type_name = "lm85c";
			break;
		case LM85_VERSTEP_LM85B:
			type_name = "lm85b";
			break;
		case LM85_VERSTEP_LM96000_1:
		case LM85_VERSTEP_LM96000_2:
			/* Check for Winbond WPCD377I */
			if (lm85_is_fake(client)) {
				dev_dbg(&adapter->dev,
					"Found Winbond WPCD377I, ignoring\n");
				return -ENODEV;
			}
			break;
		}
	} else if (company == LM85_COMPANY_ANALOG_DEV) {
		switch (verstep) {
		case LM85_VERSTEP_ADM1027:
			type_name = "adm1027";
			break;
		case LM85_VERSTEP_ADT7463:
		case LM85_VERSTEP_ADT7463C:
			type_name = "adt7463";
			break;
		case LM85_VERSTEP_ADT7468_1:
		case LM85_VERSTEP_ADT7468_2:
			type_name = "adt7468";
			break;
		}
	} else if (company == LM85_COMPANY_SMSC) {
		switch (verstep) {
		case LM85_VERSTEP_EMC6D100_A0:
		case LM85_VERSTEP_EMC6D100_A1:
			/* Note: we can't tell a '100 from a '101 */
			type_name = "emc6d100";
			break;
		case LM85_VERSTEP_EMC6D102:
			type_name = "emc6d102";
			break;
		case LM85_VERSTEP_EMC6D103_A0:
		case LM85_VERSTEP_EMC6D103_A1:
			type_name = "emc6d103";
			break;
		case LM85_VERSTEP_EMC6D103S:
			type_name = "emc6d103s";
			break;
		}
	} else {
		dev_dbg(&adapter->dev,
			"Autodetection failed: unknown vendor\n");
		return -ENODEV;
	}

	strlcpy(info->type, type_name, I2C_NAME_SIZE);

	return 0;
}

static void lm85_remove_files(struct i2c_client *client, struct lm85_data *data)
{
	sysfs_remove_group(&client->dev.kobj, &lm85_group);
	if (data->type != emc6d103s) {
		sysfs_remove_group(&client->dev.kobj, &lm85_group_minctl);
		sysfs_remove_group(&client->dev.kobj, &lm85_group_temp_off);
	}
	if (!data->has_vid5)
		sysfs_remove_group(&client->dev.kobj, &lm85_group_in4);
	if (data->type == emc6d100)
		sysfs_remove_group(&client->dev.kobj, &lm85_group_in567);
}

static int lm85_probe(struct i2c_client *client,
		      const struct i2c_device_id *id)
{
	struct lm85_data *data;
	int err;

	data = kzalloc(sizeof(struct lm85_data), GFP_KERNEL);
	if (!data)
		return -ENOMEM;

	i2c_set_clientdata(client, data);
	data->type = id->driver_data;
	mutex_init(&data->update_lock);

	/* Fill in the chip specific driver values */
	switch (data->type) {
	case adm1027:
	case adt7463:
	case adt7468:
	case emc6d100:
	case emc6d102:
	case emc6d103:
	case emc6d103s:
		data->freq_map = adm1027_freq_map;
		break;
	default:
		data->freq_map = lm85_freq_map;
	}

	/* Set the VRM version */
	data->vrm = vid_which_vrm();

	/* Initialize the LM85 chip */
	lm85_init_client(client);

	/* Register sysfs hooks */
	err = sysfs_create_group(&client->dev.kobj, &lm85_group);
	if (err)
		goto err_kfree;

	/* minctl and temp_off exist on all chips except emc6d103s */
	if (data->type != emc6d103s) {
		err = sysfs_create_group(&client->dev.kobj, &lm85_group_minctl);
		if (err)
			goto err_kfree;
		err = sysfs_create_group(&client->dev.kobj,
					 &lm85_group_temp_off);
		if (err)
			goto err_kfree;
	}

	/* The ADT7463/68 have an optional VRM 10 mode where pin 21 is used
	   as a sixth digital VID input rather than an analog input. */
	if (data->type == adt7463 || data->type == adt7468) {
		u8 vid = lm85_read_value(client, LM85_REG_VID);
		if (vid & 0x80)
			data->has_vid5 = true;
	}

	if (!data->has_vid5)
		if ((err = sysfs_create_group(&client->dev.kobj,
					&lm85_group_in4)))
			goto err_remove_files;

	/* The EMC6D100 has 3 additional voltage inputs */
	if (data->type == emc6d100)
		if ((err = sysfs_create_group(&client->dev.kobj,
					&lm85_group_in567)))
			goto err_remove_files;

	data->hwmon_dev = hwmon_device_register(&client->dev);
	if (IS_ERR(data->hwmon_dev)) {
		err = PTR_ERR(data->hwmon_dev);
		goto err_remove_files;
	}

	return 0;

	/* Error out and cleanup code */
 err_remove_files:
	lm85_remove_files(client, data);
 err_kfree:
	kfree(data);
	return err;
}

static int lm85_remove(struct i2c_client *client)
{
	struct lm85_data *data = i2c_get_clientdata(client);
	hwmon_device_unregister(data->hwmon_dev);
	lm85_remove_files(client, data);
	kfree(data);
	return 0;
}


static int lm85_read_value(struct i2c_client *client, u8 reg)
{
	int res;

	/* What size location is it? */
	switch (reg) {
	case LM85_REG_FAN(0):  /* Read WORD data */
	case LM85_REG_FAN(1):
	case LM85_REG_FAN(2):
	case LM85_REG_FAN(3):
	case LM85_REG_FAN_MIN(0):
	case LM85_REG_FAN_MIN(1):
	case LM85_REG_FAN_MIN(2):
	case LM85_REG_FAN_MIN(3):
	case LM85_REG_ALARM1:	/* Read both bytes at once */
		res = i2c_smbus_read_byte_data(client, reg) & 0xff;
		res |= i2c_smbus_read_byte_data(client, reg + 1) << 8;
		break;
	default:	/* Read BYTE data */
		res = i2c_smbus_read_byte_data(client, reg);
		break;
	}

	return res;
}

static void lm85_write_value(struct i2c_client *client, u8 reg, int value)
{
	switch (reg) {
	case LM85_REG_FAN(0):  /* Write WORD data */
	case LM85_REG_FAN(1):
	case LM85_REG_FAN(2):
	case LM85_REG_FAN(3):
	case LM85_REG_FAN_MIN(0):
	case LM85_REG_FAN_MIN(1):
	case LM85_REG_FAN_MIN(2):
	case LM85_REG_FAN_MIN(3):
	/* NOTE: ALARM is read only, so not included here */
		i2c_smbus_write_byte_data(client, reg, value & 0xff);
		i2c_smbus_write_byte_data(client, reg + 1, value >> 8);
		break;
	default:	/* Write BYTE data */
		i2c_smbus_write_byte_data(client, reg, value);
		break;
	}
}

static struct lm85_data *lm85_update_device(struct device *dev)
{
	struct i2c_client *client = to_i2c_client(dev);
	struct lm85_data *data = i2c_get_clientdata(client);
	int i;

	mutex_lock(&data->update_lock);

	if (!data->valid ||
	     time_after(jiffies, data->last_reading + LM85_DATA_INTERVAL)) {
		/* Things that change quickly */
		dev_dbg(&client->dev, "Reading sensor values\n");

		/* Have to read extended bits first to "freeze" the
		 * more significant bits that are read later.
		 * There are 2 additional resolution bits per channel and we
		 * have room for 4, so we shift them to the left.
		 */
		if (data->type == adm1027 || data->type == adt7463 ||
		    data->type == adt7468) {
			int ext1 = lm85_read_value(client,
						   ADM1027_REG_EXTEND_ADC1);
			int ext2 =  lm85_read_value(client,
						    ADM1027_REG_EXTEND_ADC2);
			int val = (ext1 << 8) + ext2;

			for (i = 0; i <= 4; i++)
				data->in_ext[i] =
					((val >> (i * 2)) & 0x03) << 2;

			for (i = 0; i <= 2; i++)
				data->temp_ext[i] =
					(val >> ((i + 4) * 2)) & 0x0c;
		}

		data->vid = lm85_read_value(client, LM85_REG_VID);

		for (i = 0; i <= 3; ++i) {
			data->in[i] =
			    lm85_read_value(client, LM85_REG_IN(i));
			data->fan[i] =
			    lm85_read_value(client, LM85_REG_FAN(i));
		}

		if (!data->has_vid5)
			data->in[4] = lm85_read_value(client, LM85_REG_IN(4));

		if (data->type == adt7468)
			data->cfg5 = lm85_read_value(client, ADT7468_REG_CFG5);

		for (i = 0; i <= 2; ++i) {
			data->temp[i] =
			    lm85_read_value(client, LM85_REG_TEMP(i));
			data->pwm[i] =
			    lm85_read_value(client, LM85_REG_PWM(i));

			if (IS_ADT7468_OFF64(data))
				data->temp[i] -= 64;
		}

		data->alarms = lm85_read_value(client, LM85_REG_ALARM1);

		if (data->type == emc6d100) {
			/* Three more voltage sensors */
			for (i = 5; i <= 7; ++i) {
				data->in[i] = lm85_read_value(client,
							EMC6D100_REG_IN(i));
			}
			/* More alarm bits */
			data->alarms |= lm85_read_value(client,
						EMC6D100_REG_ALARM3) << 16;
		} else if (data->type == emc6d102 || data->type == emc6d103 ||
			   data->type == emc6d103s) {
			/* Have to read LSB bits after the MSB ones because
			   the reading of the MSB bits has frozen the
			   LSBs (backward from the ADM1027).
			 */
			int ext1 = lm85_read_value(client,
						   EMC6D102_REG_EXTEND_ADC1);
			int ext2 = lm85_read_value(client,
						   EMC6D102_REG_EXTEND_ADC2);
			int ext3 = lm85_read_value(client,
						   EMC6D102_REG_EXTEND_ADC3);
			int ext4 = lm85_read_value(client,
						   EMC6D102_REG_EXTEND_ADC4);
			data->in_ext[0] = ext3 & 0x0f;
			data->in_ext[1] = ext4 & 0x0f;
			data->in_ext[2] = ext4 >> 4;
			data->in_ext[3] = ext3 >> 4;
			data->in_ext[4] = ext2 >> 4;

			data->temp_ext[0] = ext1 & 0x0f;
			data->temp_ext[1] = ext2 & 0x0f;
			data->temp_ext[2] = ext1 >> 4;
		}

		data->last_reading = jiffies;
	}  /* last_reading */

	if (!data->valid ||
	     time_after(jiffies, data->last_config + LM85_CONFIG_INTERVAL)) {
		/* Things that don't change often */
		dev_dbg(&client->dev, "Reading config values\n");

		for (i = 0; i <= 3; ++i) {
			data->in_min[i] =
			    lm85_read_value(client, LM85_REG_IN_MIN(i));
			data->in_max[i] =
			    lm85_read_value(client, LM85_REG_IN_MAX(i));
			data->fan_min[i] =
			    lm85_read_value(client, LM85_REG_FAN_MIN(i));
		}

		if (!data->has_vid5)  {
			data->in_min[4] = lm85_read_value(client,
					  LM85_REG_IN_MIN(4));
			data->in_max[4] = lm85_read_value(client,
					  LM85_REG_IN_MAX(4));
		}

		if (data->type == emc6d100) {
			for (i = 5; i <= 7; ++i) {
				data->in_min[i] = lm85_read_value(client,
						EMC6D100_REG_IN_MIN(i));
				data->in_max[i] = lm85_read_value(client,
						EMC6D100_REG_IN_MAX(i));
			}
		}

		for (i = 0; i <= 2; ++i) {
			int val;

			data->temp_min[i] =
			    lm85_read_value(client, LM85_REG_TEMP_MIN(i));
			data->temp_max[i] =
			    lm85_read_value(client, LM85_REG_TEMP_MAX(i));

			data->autofan[i].config =
			    lm85_read_value(client, LM85_REG_AFAN_CONFIG(i));
			val = lm85_read_value(client, LM85_REG_AFAN_RANGE(i));
			data->pwm_freq[i] = val & 0x07;
			data->zone[i].range = val >> 4;
			data->autofan[i].min_pwm =
			    lm85_read_value(client, LM85_REG_AFAN_MINPWM(i));
			data->zone[i].limit =
			    lm85_read_value(client, LM85_REG_AFAN_LIMIT(i));
			data->zone[i].critical =
			    lm85_read_value(client, LM85_REG_AFAN_CRITICAL(i));

			if (IS_ADT7468_OFF64(data)) {
				data->temp_min[i] -= 64;
				data->temp_max[i] -= 64;
				data->zone[i].limit -= 64;
				data->zone[i].critical -= 64;
			}
		}

		if (data->type != emc6d103s) {
			i = lm85_read_value(client, LM85_REG_AFAN_SPIKE1);
			data->autofan[0].min_off = (i & 0x20) != 0;
			data->autofan[1].min_off = (i & 0x40) != 0;
			data->autofan[2].min_off = (i & 0x80) != 0;

			i = lm85_read_value(client, LM85_REG_AFAN_HYST1);
			data->zone[0].hyst = i >> 4;
			data->zone[1].hyst = i & 0x0f;

			i = lm85_read_value(client, LM85_REG_AFAN_HYST2);
			data->zone[2].hyst = i >> 4;
		}

		data->last_config = jiffies;
	}  /* last_config */

	data->valid = 1;

	mutex_unlock(&data->update_lock);

	return data;
}


static int __init sm_lm85_init(void)
{
	return i2c_add_driver(&lm85_driver);
}

static void __exit sm_lm85_exit(void)
{
	i2c_del_driver(&lm85_driver);
}

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Philip Pokorny <ppokorny@penguincomputing.com>, "
	"Margit Schubert-While <margitsw@t-online.de>, "
	"Justin Thiessen <jthiessen@penguincomputing.com>");
MODULE_DESCRIPTION("LM85-B, LM85-C driver");

module_init(sm_lm85_init);
module_exit(sm_lm85_exit);
an class="hl opt">, fi, extent_end - end); btrfs_set_file_extent_offset(leaf, fi, end - orig_offset); fi = btrfs_item_ptr(leaf, path->slots[0] - 1, struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_set_file_extent_num_bytes(leaf, fi, end - other_start); btrfs_mark_buffer_dirty(leaf); goto out; } } if (start > key.offset && end == extent_end) { other_start = end; other_end = 0; if (extent_mergeable(leaf, path->slots[0] + 1, ino, bytenr, orig_offset, &other_start, &other_end)) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_num_bytes(leaf, fi, start - key.offset); btrfs_set_file_extent_generation(leaf, fi, trans->transid); path->slots[0]++; new_key.offset = start; btrfs_set_item_key_safe(fs_info, path, &new_key); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_set_file_extent_num_bytes(leaf, fi, other_end - start); btrfs_set_file_extent_offset(leaf, fi, start - orig_offset); btrfs_mark_buffer_dirty(leaf); goto out; } } while (start > key.offset || end < extent_end) { if (key.offset == start) split = end; new_key.offset = split; ret = btrfs_duplicate_item(trans, root, path, &new_key); if (ret == -EAGAIN) { btrfs_release_path(path); goto again; } if (ret < 0) { btrfs_abort_transaction(trans, ret); goto out; } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0] - 1, struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split); btrfs_mark_buffer_dirty(leaf); btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, num_bytes, 0); btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } if (split == start) { key.offset = start; } else { if (start != key.offset) { ret = -EINVAL; btrfs_abort_transaction(trans, ret); goto out; } path->slots[0]--; extent_end = end; } recow = 1; } other_start = end; other_end = 0; btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, num_bytes, 0); btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset, 0, false); if (extent_mergeable(leaf, path->slots[0] + 1, ino, bytenr, orig_offset, &other_start, &other_end)) { if (recow) { btrfs_release_path(path); goto again; } extent_end = other_end; del_slot = path->slots[0] + 1; del_nr++; ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } } other_start = 0; other_end = start; if (extent_mergeable(leaf, path->slots[0] - 1, ino, bytenr, orig_offset, &other_start, &other_end)) { if (recow) { btrfs_release_path(path); goto again; } key.offset = other_start; del_slot = path->slots[0]; del_nr++; ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } } if (del_nr == 0) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_mark_buffer_dirty(leaf); } else { fi = btrfs_item_ptr(leaf, del_slot - 1, struct btrfs_file_extent_item); btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); btrfs_mark_buffer_dirty(leaf); ret = btrfs_del_items(trans, root, path, del_slot, del_nr); if (ret < 0) { btrfs_abort_transaction(trans, ret); goto out; } } out: btrfs_free_path(path); return ret; } /* * on error we return an unlocked page and the error value * on success we return a locked page and 0 */ static int prepare_uptodate_page(struct inode *inode, struct page *page, u64 pos, bool force_uptodate) { int ret = 0; if (((pos & (PAGE_SIZE - 1)) || force_uptodate) && !PageUptodate(page)) { ret = btrfs_readpage(NULL, page); if (ret) return ret; lock_page(page); if (!PageUptodate(page)) { unlock_page(page); return -EIO; } /* * Since btrfs_readpage() will unlock the page before it * returns, there is a window where btrfs_releasepage() can be * called to release the page. Here we check both inode * mapping and PagePrivate() to make sure the page was not * released. * * The private flag check is essential for subpage as we need * to store extra bitmap using page->private. */ if (page->mapping != inode->i_mapping || !PagePrivate(page)) { unlock_page(page); return -EAGAIN; } } return 0; } /* * this just gets pages into the page cache and locks them down. */ static noinline int prepare_pages(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, bool force_uptodate) { int i; unsigned long index = pos >> PAGE_SHIFT; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int err = 0; int faili; for (i = 0; i < num_pages; i++) { again: pages[i] = find_or_create_page(inode->i_mapping, index + i, mask | __GFP_WRITE); if (!pages[i]) { faili = i - 1; err = -ENOMEM; goto fail; } err = set_page_extent_mapped(pages[i]); if (err < 0) { faili = i; goto fail; } if (i == 0) err = prepare_uptodate_page(inode, pages[i], pos, force_uptodate); if (!err && i == num_pages - 1) err = prepare_uptodate_page(inode, pages[i], pos + write_bytes, false); if (err) { put_page(pages[i]); if (err == -EAGAIN) { err = 0; goto again; } faili = i - 1; goto fail; } wait_on_page_writeback(pages[i]); } return 0; fail: while (faili >= 0) { unlock_page(pages[faili]); put_page(pages[faili]); faili--; } return err; } /* * This function locks the extent and properly waits for data=ordered extents * to finish before allowing the pages to be modified if need. * * The return value: * 1 - the extent is locked * 0 - the extent is not locked, and everything is OK * -EAGAIN - need re-prepare the pages * the other < 0 number - Something wrong happens */ static noinline int lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, u64 *lockstart, u64 *lockend, struct extent_state **cached_state) { struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 start_pos; u64 last_pos; int i; int ret = 0; start_pos = round_down(pos, fs_info->sectorsize); last_pos = round_up(pos + write_bytes, fs_info->sectorsize) - 1; if (start_pos < inode->vfs_inode.i_size) { struct btrfs_ordered_extent *ordered; lock_extent_bits(&inode->io_tree, start_pos, last_pos, cached_state); ordered = btrfs_lookup_ordered_range(inode, start_pos, last_pos - start_pos + 1); if (ordered && ordered->file_offset + ordered->num_bytes > start_pos && ordered->file_offset <= last_pos) { unlock_extent_cached(&inode->io_tree, start_pos, last_pos, cached_state); for (i = 0; i < num_pages; i++) { unlock_page(pages[i]); put_page(pages[i]); } btrfs_start_ordered_extent(ordered, 1); btrfs_put_ordered_extent(ordered); return -EAGAIN; } if (ordered) btrfs_put_ordered_extent(ordered); *lockstart = start_pos; *lockend = last_pos; ret = 1; } /* * We should be called after prepare_pages() which should have locked * all pages in the range. */ for (i = 0; i < num_pages; i++) WARN_ON(!PageLocked(pages[i])); return ret; } /* * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) * * @pos: File offset. * @write_bytes: The length to write, will be updated to the nocow writeable * range. * * This function will flush ordered extents in the range to ensure proper * nocow checks. * * Return: * > 0 If we can nocow, and updates @write_bytes. * 0 If we can't do a nocow write. * -EAGAIN If we can't do a nocow write because snapshoting of the inode's * root is in progress. * < 0 If an error happened. * * NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0. */ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, size_t *write_bytes) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; u64 lockstart, lockend; u64 num_bytes; int ret; if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) return 0; if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; num_bytes = lockend - lockstart + 1; btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, NULL); ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, NULL, NULL, NULL, false); if (ret <= 0) { ret = 0; btrfs_drew_write_unlock(&root->snapshot_lock); } else { *write_bytes = min_t(size_t, *write_bytes , num_bytes - pos + lockstart); } unlock_extent(&inode->io_tree, lockstart, lockend); return ret; } void btrfs_check_nocow_unlock(struct btrfs_inode *inode) { btrfs_drew_write_unlock(&inode->root->snapshot_lock); } static void update_time_for_write(struct inode *inode) { struct timespec64 now; if (IS_NOCMTIME(inode)) return; now = current_time(inode); if (!timespec64_equal(&inode->i_mtime, &now)) inode->i_mtime = now; if (!timespec64_equal(&inode->i_ctime, &now)) inode->i_ctime = now; if (IS_I_VERSION(inode)) inode_inc_iversion(inode); } static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); loff_t pos = iocb->ki_pos; int ret; loff_t oldsize; loff_t start_pos; /* * Quickly bail out on NOWAIT writes if we don't have the nodatacow or * prealloc flags, as without those flags we always have to COW. We will * later check if we can really COW into the target range (using * can_nocow_extent() at btrfs_get_blocks_direct_write()). */ if ((iocb->ki_flags & IOCB_NOWAIT) && !(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) return -EAGAIN; current->backing_dev_info = inode_to_bdi(inode); ret = file_remove_privs(file); if (ret) return ret; /* * We reserve space for updating the inode when we reserve space for the * extent we are going to write, so we will enospc out there. We don't * need to start yet another transaction to update the inode as we will * update the inode when we finish writing whatever data we write. */ update_time_for_write(inode); start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); if (start_pos > oldsize) { /* Expand hole size to cover write data, preventing empty gap */ loff_t end_pos = round_up(pos + count, fs_info->sectorsize); ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos); if (ret) { current->backing_dev_info = NULL; return ret; } } return 0; } static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) { struct file *file = iocb->ki_filp; loff_t pos; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct page **pages = NULL; struct extent_changeset *data_reserved = NULL; u64 release_bytes = 0; u64 lockstart; u64 lockend; size_t num_written = 0; int nrptrs; ssize_t ret; bool only_release_metadata = false; bool force_page_uptodate = false; loff_t old_isize = i_size_read(inode); unsigned int ilock_flags = 0; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; ret = btrfs_inode_lock(inode, ilock_flags); if (ret < 0) return ret; ret = generic_write_checks(iocb, i); if (ret <= 0) goto out; ret = btrfs_write_check(iocb, i, ret); if (ret < 0) goto out; pos = iocb->ki_pos; nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE), PAGE_SIZE / (sizeof(struct page *))); nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); nrptrs = max(nrptrs, 8); pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL); if (!pages) { ret = -ENOMEM; goto out; } while (iov_iter_count(i) > 0) { struct extent_state *cached_state = NULL; size_t offset = offset_in_page(pos); size_t sector_offset; size_t write_bytes = min(iov_iter_count(i), nrptrs * (size_t)PAGE_SIZE - offset); size_t num_pages; size_t reserve_bytes; size_t dirty_pages; size_t copied; size_t dirty_sectors; size_t num_sectors; int extents_locked; /* * Fault pages before locking them in prepare_pages * to avoid recursive lock */ if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) { ret = -EFAULT; break; } only_release_metadata = false; sector_offset = pos & (fs_info->sectorsize - 1); extent_changeset_release(data_reserved); ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, pos, write_bytes); if (ret < 0) { /* * If we don't have to COW at the offset, reserve * metadata only. write_bytes may get smaller than * requested here. */ if (btrfs_check_nocow_lock(BTRFS_I(inode), pos, &write_bytes) > 0) only_release_metadata = true; else break; } num_pages = DIV_ROUND_UP(write_bytes + offset, PAGE_SIZE); WARN_ON(num_pages > nrptrs); reserve_bytes = round_up(write_bytes + sector_offset, fs_info->sectorsize); WARN_ON(reserve_bytes == 0); ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), reserve_bytes, reserve_bytes, false); if (ret) { if (!only_release_metadata) btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, pos, write_bytes); else btrfs_check_nocow_unlock(BTRFS_I(inode)); break; } release_bytes = reserve_bytes; again: /* * This is going to setup the pages array with the number of * pages we want, so we don't really need to worry about the * contents of pages from loop to loop */ ret = prepare_pages(inode, pages, num_pages, pos, write_bytes, force_page_uptodate); if (ret) { btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); break; } extents_locked = lock_and_cleanup_extent_if_need( BTRFS_I(inode), pages, num_pages, pos, write_bytes, &lockstart, &lockend, &cached_state); if (extents_locked < 0) { if (extents_locked == -EAGAIN) goto again; btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); ret = extents_locked; break; } copied = btrfs_copy_from_user(pos, write_bytes, pages, i); num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes); dirty_sectors = round_up(copied + sector_offset, fs_info->sectorsize); dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors); /* * if we have trouble faulting in the pages, fall * back to one page at a time */ if (copied < write_bytes) nrptrs = 1; if (copied == 0) { force_page_uptodate = true; dirty_sectors = 0; dirty_pages = 0; } else { force_page_uptodate = false; dirty_pages = DIV_ROUND_UP(copied + offset, PAGE_SIZE); } if (num_sectors > dirty_sectors) { /* release everything except the sectors we dirtied */ release_bytes -= dirty_sectors << fs_info->sectorsize_bits; if (only_release_metadata) { btrfs_delalloc_release_metadata(BTRFS_I(inode), release_bytes, true); } else { u64 __pos; __pos = round_down(pos, fs_info->sectorsize) + (dirty_pages << PAGE_SHIFT); btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, __pos, release_bytes, true); } } release_bytes = round_up(copied + sector_offset, fs_info->sectorsize); ret = btrfs_dirty_pages(BTRFS_I(inode), pages, dirty_pages, pos, copied, &cached_state, only_release_metadata); /* * If we have not locked the extent range, because the range's * start offset is >= i_size, we might still have a non-NULL * cached extent state, acquired while marking the extent range * as delalloc through btrfs_dirty_pages(). Therefore free any * possible cached extent state to avoid a memory leak. */ if (extents_locked) unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); else free_extent_state(cached_state); btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); if (ret) { btrfs_drop_pages(fs_info, pages, num_pages, pos, copied); break; } release_bytes = 0; if (only_release_metadata) btrfs_check_nocow_unlock(BTRFS_I(inode)); btrfs_drop_pages(fs_info, pages, num_pages, pos, copied); cond_resched(); balance_dirty_pages_ratelimited(inode->i_mapping); pos += copied; num_written += copied; } kfree(pages); if (release_bytes) { if (only_release_metadata) { btrfs_check_nocow_unlock(BTRFS_I(inode)); btrfs_delalloc_release_metadata(BTRFS_I(inode), release_bytes, true); } else { btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, round_down(pos, fs_info->sectorsize), release_bytes, true); } } extent_changeset_free(data_reserved); if (num_written > 0) { pagecache_isize_extended(inode, old_isize, iocb->ki_pos); iocb->ki_pos += num_written; } out: btrfs_inode_unlock(inode, ilock_flags); return num_written ? num_written : ret; } static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, const struct iov_iter *iter, loff_t offset) { const u32 blocksize_mask = fs_info->sectorsize - 1; if (offset & blocksize_mask) return -EINVAL; if (iov_iter_alignment(iter) & blocksize_mask) return -EINVAL; return 0; } static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC); struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); loff_t pos; ssize_t written = 0; ssize_t written_buffered; size_t prev_left = 0; loff_t endbyte; ssize_t err; unsigned int ilock_flags = 0; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; /* If the write DIO is within EOF, use a shared lock */ if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: err = btrfs_inode_lock(inode, ilock_flags); if (err < 0) return err; err = generic_write_checks(iocb, from); if (err <= 0) { btrfs_inode_unlock(inode, ilock_flags); return err; } err = btrfs_write_check(iocb, from, err); if (err < 0) { btrfs_inode_unlock(inode, ilock_flags); goto out; } pos = iocb->ki_pos; /* * Re-check since file size may have changed just before taking the * lock or pos may have changed because of O_APPEND in generic_write_check() */ if ((ilock_flags & BTRFS_ILOCK_SHARED) && pos + iov_iter_count(from) > i_size_read(inode)) { btrfs_inode_unlock(inode, ilock_flags); ilock_flags &= ~BTRFS_ILOCK_SHARED; goto relock; } if (check_direct_IO(fs_info, from, pos)) { btrfs_inode_unlock(inode, ilock_flags); goto buffered; } /* * We remove IOCB_DSYNC so that we don't deadlock when iomap_dio_rw() * calls generic_write_sync() (through iomap_dio_complete()), because * that results in calling fsync (btrfs_sync_file()) which will try to * lock the inode in exclusive/write mode. */ if (is_sync_write) iocb->ki_flags &= ~IOCB_DSYNC; /* * The iov_iter can be mapped to the same file range we are writing to. * If that's the case, then we will deadlock in the iomap code, because * it first calls our callback btrfs_dio_iomap_begin(), which will create * an ordered extent, and after that it will fault in the pages that the * iov_iter refers to. During the fault in we end up in the readahead * pages code (starting at btrfs_readahead()), which will lock the range, * find that ordered extent and then wait for it to complete (at * btrfs_lock_and_flush_ordered_range()), resulting in a deadlock since * obviously the ordered extent can never complete as we didn't submit * yet the respective bio(s). This always happens when the buffer is * memory mapped to the same file range, since the iomap DIO code always * invalidates pages in the target file range (after starting and waiting * for any writeback). * * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ again: from->nofault = true; err = btrfs_dio_rw(iocb, from, written); from->nofault = false; /* No increment (+=) because iomap returns a cumulative value. */ if (err > 0) written = err; if (iov_iter_count(from) > 0 && (err == -EFAULT || err > 0)) { const size_t left = iov_iter_count(from); /* * We have more data left to write. Try to fault in as many as * possible of the remainder pages and retry. We do this without * releasing and locking again the inode, to prevent races with * truncate. * * Also, in case the iov refers to pages in the file range of the * file we want to write to (due to a mmap), we could enter an * infinite loop if we retry after faulting the pages in, since * iomap will invalidate any pages in the range early on, before * it tries to fault in the pages of the iov. So we keep track of * how much was left of iov in the previous EFAULT and fallback * to buffered IO in case we haven't made any progress. */ if (left == prev_left) { err = -ENOTBLK; } else { fault_in_iov_iter_readable(from, left); prev_left = left; goto again; } } btrfs_inode_unlock(inode, ilock_flags); /* * Add back IOCB_DSYNC. Our caller, btrfs_file_write_iter(), will do * the fsync (call generic_write_sync()). */ if (is_sync_write) iocb->ki_flags |= IOCB_DSYNC; /* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */ if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from)) goto out; buffered: pos = iocb->ki_pos; written_buffered = btrfs_buffered_write(iocb, from); if (written_buffered < 0) { err = written_buffered; goto out; } /* * Ensure all data is persisted. We want the next direct IO read to be * able to read what was just written. */ endbyte = pos + written_buffered - 1; err = btrfs_fdatawrite_range(inode, pos, endbyte); if (err) goto out; err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte); if (err) goto out; written += written_buffered; iocb->ki_pos = pos + written_buffered; invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT, endbyte >> PAGE_SHIFT); out: return err < 0 ? err : written; } static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); loff_t count; ssize_t ret; btrfs_inode_lock(inode, 0); count = encoded->len; ret = generic_write_checks_count(iocb, &count); if (ret == 0 && count != encoded->len) { /* * The write got truncated by generic_write_checks_count(). We * can't do a partial encoded write. */ ret = -EFBIG; } if (ret || encoded->len == 0) goto out; ret = btrfs_write_check(iocb, from, encoded->len); if (ret < 0) goto out; ret = btrfs_do_encoded_write(iocb, from, encoded); out: btrfs_inode_unlock(inode, 0); return ret; } ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded) { struct file *file = iocb->ki_filp; struct btrfs_inode *inode = BTRFS_I(file_inode(file)); ssize_t num_written, num_sync; const bool sync = iocb->ki_flags & IOCB_DSYNC; /* * If the fs flips readonly due to some impossible error, although we * have opened a file as writable, we have to stop this write operation * to ensure consistency. */ if (BTRFS_FS_ERROR(inode->root->fs_info)) return -EROFS; if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) return -EOPNOTSUPP; if (sync) atomic_inc(&inode->sync_writers); if (encoded) { num_written = btrfs_encoded_write(iocb, from, encoded); num_sync = encoded->len; } else if (iocb->ki_flags & IOCB_DIRECT) { num_written = num_sync = btrfs_direct_write(iocb, from); } else { num_written = num_sync = btrfs_buffered_write(iocb, from); } btrfs_set_inode_last_sub_trans(inode); if (num_sync > 0) { num_sync = generic_write_sync(iocb, num_sync); if (num_sync < 0) num_written = num_sync; } if (sync) atomic_dec(&inode->sync_writers); current->backing_dev_info = NULL; return num_written; } static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { return btrfs_do_write_iter(iocb, from, NULL); } int btrfs_release_file(struct inode *inode, struct file *filp) { struct btrfs_file_private *private = filp->private_data; if (private && private->filldir_buf) kfree(private->filldir_buf); kfree(private); filp->private_data = NULL; /* * Set by setattr when we are about to truncate a file from a non-zero * size to a zero size. This tries to flush down new bytes that may * have been written if the application were using truncate to replace * a file in place. */ if (test_and_clear_bit(BTRFS_INODE_FLUSH_ON_CLOSE, &BTRFS_I(inode)->runtime_flags)) filemap_flush(inode->i_mapping); return 0; } static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) { int ret; struct blk_plug plug; /* * This is only called in fsync, which would do synchronous writes, so * a plug can merge adjacent IOs as much as possible. Esp. in case of * multiple disks using raid profile, a large IO can be split to * several segments of stripe length (currently 64K). */ blk_start_plug(&plug); atomic_inc(&BTRFS_I(inode)->sync_writers); ret = btrfs_fdatawrite_range(inode, start, end); atomic_dec(&BTRFS_I(inode)->sync_writers); blk_finish_plug(&plug); return ret; } static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) { struct btrfs_inode *inode = BTRFS_I(ctx->inode); struct btrfs_fs_info *fs_info = inode->root->fs_info; if (btrfs_inode_in_log(inode, fs_info->generation) && list_empty(&ctx->ordered_extents)) return true; /* * If we are doing a fast fsync we can not bail out if the inode's * last_trans is <= then the last committed transaction, because we only * update the last_trans of the inode during ordered extent completion, * and for a fast fsync we don't wait for that, we only wait for the * writeback to complete. */ if (inode->last_trans <= fs_info->last_trans_committed && (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) || list_empty(&ctx->ordered_extents))) return true; return false; } /* * fsync call for both files and directories. This logs the inode into * the tree log instead of forcing full commits whenever possible. * * It needs to call filemap_fdatawait so that all ordered extent updates are * in the metadata btree are up to date for copying to the log. * * It drops the inode mutex before doing the tree log commit. This is an * important optimization for directories because holding the mutex prevents * new operations on the dir while we write to disk. */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; struct btrfs_log_ctx ctx; int ret = 0, err; u64 len; bool full_sync; trace_btrfs_sync_file(file, datasync); btrfs_init_log_ctx(&ctx, inode); /* * Always set the range to a full range, otherwise we can get into * several problems, from missing file extent items to represent holes * when not using the NO_HOLES feature, to log tree corruption due to * races between hole detection during logging and completion of ordered * extents outside the range, to missing checksums due to ordered extents * for which we flushed only a subset of their pages. */ start = 0; end = LLONG_MAX; len = (u64)LLONG_MAX + 1; /* * We write the dirty pages in the range and wait until they complete * out of the ->i_mutex. If so, we can flush the dirty pages by * multi-task, and make the performance up. See * btrfs_wait_ordered_range for an explanation of the ASYNC check. */ ret = start_ordered_ops(inode, start, end); if (ret) goto out; btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); /* * Always check for the full sync flag while holding the inode's lock, * to avoid races with other tasks. The flag must be either set all the * time during logging or always off all the time while logging. */ full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); /* * Before we acquired the inode's lock and the mmap lock, someone may * have dirtied more pages in the target range. We need to make sure * that writeback for any such pages does not start while we are logging * the inode, because if it does, any of the following might happen when * we are not doing a full inode sync: * * 1) We log an extent after its writeback finishes but before its * checksums are added to the csum tree, leading to -EIO errors * when attempting to read the extent after a log replay. * * 2) We can end up logging an extent before its writeback finishes. * Therefore after the log replay we will have a file extent item * pointing to an unwritten extent (and no data checksums as well). * * So trigger writeback for any eventual new dirty pages and then we * wait for all ordered extents to complete below. */ ret = start_ordered_ops(inode, start, end); if (ret) { btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } /* * We have to do this here to avoid the priority inversion of waiting on * IO of a lower priority task while holding a transaction open. * * For a full fsync we wait for the ordered extents to complete while * for a fast fsync we wait just for writeback to complete, and then * attach the ordered extents to the transaction so that a transaction * commit waits for their completion, to avoid data loss if we fsync, * the current transaction commits before the ordered extents complete * and a power failure happens right after that. * * For zoned filesystem, if a write IO uses a ZONE_APPEND command, the * logical address recorded in the ordered extent may change. We need * to wait for the IO to stabilize the logical address. */ if (full_sync || btrfs_is_zoned(fs_info)) { ret = btrfs_wait_ordered_range(inode, start, len); } else { /* * Get our ordered extents as soon as possible to avoid doing * checksum lookups in the csum tree, and use instead the * checksums attached to the ordered extents. */ btrfs_get_ordered_extents_for_logging(BTRFS_I(inode), &ctx.ordered_extents); ret = filemap_fdatawait_range(inode->i_mapping, start, end); } if (ret) goto out_release_extents; atomic_inc(&root->log_batch); smp_mb(); if (skip_inode_logging(&ctx)) { /* * We've had everything committed since the last time we were * modified so clear this flag in case it was set for whatever * reason, it's no longer relevant. */ clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); /* * An ordered extent might have started before and completed * already with io errors, in which case the inode was not * updated and we end up here. So check the inode's mapping * for any errors that might have happened since we last * checked called fsync. */ ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err); goto out_release_extents; } /* * We use start here because we will need to wait on the IO to complete * in btrfs_sync_log, which could require joining a transaction (for * example checking cross references in the nocow path). If we use join * here we could get into a situation where we're waiting on IO to * happen that is blocked on a transaction trying to commit. With start * we inc the extwriter counter, so we wait for all extwriters to exit * before we start blocking joiners. This comment is to keep somebody * from thinking they are super smart and changing this to * btrfs_join_transaction *cough*Josef*cough*. */ trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_release_extents; } trans->in_fsync = true; ret = btrfs_log_dentry_safe(trans, dentry, &ctx); btrfs_release_log_ctx_extents(&ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ ret = 1; } /* we've logged all the items and now have a consistent * version of the file in the log. It is possible that * someone will come in and modify the file, but that's * fine because the log is consistent on disk, and we * have references to all of the file's extents * * It is possible that someone will come in and log the * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); if (ret != BTRFS_NO_LOG_SYNC) { if (!ret) { ret = btrfs_sync_log(trans, root, &ctx); if (!ret) { ret = btrfs_end_transaction(trans); goto out; } } if (!full_sync) { ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { btrfs_end_transaction(trans); goto out; } } ret = btrfs_commit_transaction(trans); } else { ret = btrfs_end_transaction(trans); } out: ASSERT(list_empty(&ctx.list)); err = file_check_and_advance_wb_err(file); if (!ret) ret = err; return ret > 0 ? -EIO : ret; out_release_extents: btrfs_release_log_ctx_extents(&ctx); btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = btrfs_page_mkwrite, }; static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) { struct address_space *mapping = filp->f_mapping; if (!mapping->a_ops->readpage) return -ENOEXEC; file_accessed(filp); vma->vm_ops = &btrfs_file_vm_ops; return 0; } static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf, int slot, u64 start, u64 end) { struct btrfs_file_extent_item *fi; struct btrfs_key key; if (slot < 0 || slot >= btrfs_header_nritems(leaf)) return 0; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) return 0; fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) return 0; if (btrfs_file_extent_disk_bytenr(leaf, fi)) return 0; if (key.offset == end) return 1; if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) return 1; return 0; } static int fill_holes(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, u64 offset, u64 end) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; struct extent_map *hole_em; struct extent_map_tree *em_tree = &inode->extent_tree; struct btrfs_key key; int ret; if (btrfs_fs_incompat(fs_info, NO_HOLES)) goto out; key.objectid = btrfs_ino(inode); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = offset; ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret <= 0) { /* * We should have dropped this offset, so if we find it then * something has gone horribly wrong. */ if (ret == 0) ret = -EINVAL; return ret; } leaf = path->nodes[0]; if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) { u64 num_bytes; path->slots[0]--; fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - offset; btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); btrfs_mark_buffer_dirty(leaf); goto out; } if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) { u64 num_bytes; key.offset = offset; btrfs_set_item_key_safe(fs_info, path, &key); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - offset; btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); btrfs_mark_buffer_dirty(leaf); goto out; } btrfs_release_path(path); ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0); if (ret) return ret; out: btrfs_release_path(path); hole_em = alloc_extent_map(); if (!hole_em) { btrfs_drop_extent_cache(inode, offset, end - 1, 0); btrfs_set_inode_full_sync(inode); } else { hole_em->start = offset; hole_em->len = end - offset; hole_em->ram_bytes = hole_em->len; hole_em->orig_start = offset; hole_em->block_start = EXTENT_MAP_HOLE; hole_em->block_len = 0; hole_em->orig_block_len = 0; hole_em->compress_type = BTRFS_COMPRESS_NONE; hole_em->generation = trans->transid; do { btrfs_drop_extent_cache(inode, offset, end - 1, 0); write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, hole_em, 1); write_unlock(&em_tree->lock); } while (ret == -EEXIST); free_extent_map(hole_em); if (ret) btrfs_set_inode_full_sync(inode); } return 0; } /* * Find a hole extent on given inode and change start/len to the end of hole * extent.(hole/vacuum extent whose em->start <= start && * em->start + em->len > start) * When a hole extent is found, return 1 and modify start/len. */ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_map *em; int ret = 0; em = btrfs_get_extent(inode, NULL, 0, round_down(*start, fs_info->sectorsize), round_up(*len, fs_info->sectorsize)); if (IS_ERR(em)) return PTR_ERR(em); /* Hole or vacuum extent(only exists in no-hole mode) */ if (em->block_start == EXTENT_MAP_HOLE) { ret = 1; *len = em->start + em->len > *start + *len ? 0 : *start + *len - em->start - em->len; *start = em->start + em->len; } free_extent_map(em); return ret; } static void btrfs_punch_hole_lock_range(struct inode *inode, const u64 lockstart, const u64 lockend, struct extent_state **cached_state) { /* * For subpage case, if the range is not at page boundary, we could * have pages at the leading/tailing part of the range. * This could lead to dead loop since filemap_range_has_page() * will always return true. * So here we need to do extra page alignment for * filemap_range_has_page(). */ const u64 page_lockstart = round_up(lockstart, PAGE_SIZE); const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1; while (1) { truncate_pagecache_range(inode, lockstart, lockend); lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, cached_state); /* * We can't have ordered extents in the range, nor dirty/writeback * pages, because we have locked the inode's VFS lock in exclusive * mode, we have locked the inode's i_mmap_lock in exclusive mode, * we have flushed all delalloc in the range and we have waited * for any ordered extents in the range to complete. * We can race with anyone reading pages from this range, so after * locking the range check if we have pages in the range, and if * we do, unlock the range and retry. */ if (!filemap_range_has_page(inode->i_mapping, page_lockstart, page_lockend)) break; unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, cached_state); } btrfs_assert_inode_range_clean(BTRFS_I(inode), lockstart, lockend); } static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_replace_extent_info *extent_info, const u64 replace_len, const u64 bytes_to_drop) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = inode->root; struct btrfs_file_extent_item *extent; struct extent_buffer *leaf; struct btrfs_key key; int slot; struct btrfs_ref ref = { 0 }; int ret; if (replace_len == 0) return 0; if (extent_info->disk_offset == 0 && btrfs_fs_incompat(fs_info, NO_HOLES)) { btrfs_update_inode_bytes(inode, 0, bytes_to_drop); return 0; } key.objectid = btrfs_ino(inode); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = extent_info->file_offset; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(struct btrfs_file_extent_item)); if (ret) return ret; leaf = path->nodes[0]; slot = path->slots[0]; write_extent_buffer(leaf, extent_info->extent_buf, btrfs_item_ptr_offset(leaf, slot), sizeof(struct btrfs_file_extent_item)); extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); ASSERT(btrfs_file_extent_type(leaf, extent) != BTRFS_FILE_EXTENT_INLINE); btrfs_set_file_extent_offset(leaf, extent, extent_info->data_offset); btrfs_set_file_extent_num_bytes(leaf, extent, replace_len); if (extent_info->is_new_extent) btrfs_set_file_extent_generation(leaf, extent, trans->transid); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); ret = btrfs_inode_set_file_extent_range(inode, extent_info->file_offset, replace_len); if (ret) return ret; /* If it's a hole, nothing more needs to be done. */ if (extent_info->disk_offset == 0) { btrfs_update_inode_bytes(inode, 0, bytes_to_drop); return 0; } btrfs_update_inode_bytes(inode, replace_len, bytes_to_drop); if (extent_info->is_new_extent && extent_info->insertions == 0) { key.objectid = extent_info->disk_offset; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = extent_info->disk_len; ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode), extent_info->file_offset, extent_info->qgroup_reserved, &key); } else { u64 ref_offset; btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, extent_info->disk_offset, extent_info->disk_len, 0); ref_offset = extent_info->file_offset - extent_info->data_offset; btrfs_init_data_ref(&ref, root->root_key.objectid, btrfs_ino(inode), ref_offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); } extent_info->insertions++; return ret; } /* * The respective range must have been previously locked, as well as the inode. * The end offset is inclusive (last byte of the range). * @extent_info is NULL for fallocate's hole punching and non-NULL when replacing * the file range with an extent. * When not punching a hole, we don't want to end up in a state where we dropped * extents without inserting a new one, so we must abort the transaction to avoid * a corruption. */ int btrfs_replace_file_extents(struct btrfs_inode *inode, struct btrfs_path *path, const u64 start, const u64 end, struct btrfs_replace_extent_info *extent_info, struct btrfs_trans_handle **trans_out) { struct btrfs_drop_extents_args drop_args = { 0 }; struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; u64 min_size = btrfs_calc_insert_metadata_size(fs_info, 1); u64 ino_size = round_up(inode->vfs_inode.i_size, fs_info->sectorsize); struct btrfs_trans_handle *trans = NULL; struct btrfs_block_rsv *rsv; unsigned int rsv_count; u64 cur_offset; u64 len = end - start; int ret = 0; if (end <= start) return -EINVAL; rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); if (!rsv) { ret = -ENOMEM; goto out; } rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1); rsv->failfast = 1; /* * 1 - update the inode * 1 - removing the extents in the range * 1 - adding the hole extent if no_holes isn't set or if we are * replacing the range with a new extent */ if (!btrfs_fs_incompat(fs_info, NO_HOLES) || extent_info) rsv_count = 3; else rsv_count = 2; trans = btrfs_start_transaction(root, rsv_count); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; goto out_free; } ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, min_size, false); BUG_ON(ret); trans->block_rsv = rsv; cur_offset = start; drop_args.path = path; drop_args.end = end + 1; drop_args.drop_cache = true; while (cur_offset < end) { drop_args.start = cur_offset; ret = btrfs_drop_extents(trans, root, inode, &drop_args); /* If we are punching a hole decrement the inode's byte count */ if (!extent_info) btrfs_update_inode_bytes(inode, 0, drop_args.bytes_found); if (ret != -ENOSPC) { /* * The only time we don't want to abort is if we are * attempting to clone a partial inline extent, in which * case we'll get EOPNOTSUPP. However if we aren't * clone we need to abort no matter what, because if we * got EOPNOTSUPP via prealloc then we messed up and * need to abort. */ if (ret && (ret != -EOPNOTSUPP || (extent_info && extent_info->is_new_extent))) btrfs_abort_transaction(trans, ret); break; } trans->block_rsv = &fs_info->trans_block_rsv; if (!extent_info && cur_offset < drop_args.drop_end && cur_offset < ino_size) { ret = fill_holes(trans, inode, path, cur_offset, drop_args.drop_end); if (ret) { /* * If we failed then we didn't insert our hole * entries for the area we dropped, so now the * fs is corrupted, so we must abort the * transaction. */ btrfs_abort_transaction(trans, ret); break; } } else if (!extent_info && cur_offset < drop_args.drop_end) { /* * We are past the i_size here, but since we didn't * insert holes we need to clear the mapped area so we * know to not set disk_i_size in this area until a new * file extent is inserted here. */ ret = btrfs_inode_clear_file_extent_range(inode, cur_offset, drop_args.drop_end - cur_offset); if (ret) { /* * We couldn't clear our area, so we could * presumably adjust up and corrupt the fs, so * we need to abort. */ btrfs_abort_transaction(trans, ret); break; } } if (extent_info && drop_args.drop_end > extent_info->file_offset) { u64 replace_len = drop_args.drop_end - extent_info->file_offset; ret = btrfs_insert_replace_extent(trans, inode, path, extent_info, replace_len, drop_args.bytes_found); if (ret) { btrfs_abort_transaction(trans, ret); break; } extent_info->data_len -= replace_len; extent_info->data_offset += replace_len; extent_info->file_offset += replace_len; } ret = btrfs_update_inode(trans, root, inode); if (ret) break; btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); trans = btrfs_start_transaction(root, rsv_count); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; break; } ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, min_size, false); BUG_ON(ret); /* shouldn't happen */ trans->block_rsv = rsv; cur_offset = drop_args.drop_end; len = end - cur_offset; if (!extent_info && len) { ret = find_first_non_hole(inode, &cur_offset, &len); if (unlikely(ret < 0)) break; if (ret && !len) { ret = 0; break; } } } /* * If we were cloning, force the next fsync to be a full one since we * we replaced (or just dropped in the case of cloning holes when * NO_HOLES is enabled) file extent items and did not setup new extent * maps for the replacement extents (or holes). */ if (extent_info && !extent_info->is_new_extent) btrfs_set_inode_full_sync(inode); if (ret) goto out_trans; trans->block_rsv = &fs_info->trans_block_rsv; /* * If we are using the NO_HOLES feature we might have had already an * hole that overlaps a part of the region [lockstart, lockend] and * ends at (or beyond) lockend. Since we have no file extent items to * represent holes, drop_end can be less than lockend and so we must * make sure we have an extent map representing the existing hole (the * call to __btrfs_drop_extents() might have dropped the existing extent * map representing the existing hole), otherwise the fast fsync path * will not record the existence of the hole region * [existing_hole_start, lockend]. */ if (drop_args.drop_end <= end) drop_args.drop_end = end + 1; /* * Don't insert file hole extent item if it's for a range beyond eof * (because it's useless) or if it represents a 0 bytes range (when * cur_offset == drop_end). */ if (!extent_info && cur_offset < ino_size && cur_offset < drop_args.drop_end) { ret = fill_holes(trans, inode, path, cur_offset, drop_args.drop_end); if (ret) { /* Same comment as above. */ btrfs_abort_transaction(trans, ret); goto out_trans; } } else if (!extent_info && cur_offset < drop_args.drop_end) { /* See the comment in the loop above for the reasoning here. */ ret = btrfs_inode_clear_file_extent_range(inode, cur_offset, drop_args.drop_end - cur_offset); if (ret) { btrfs_abort_transaction(trans, ret); goto out_trans; } } if (extent_info) { ret = btrfs_insert_replace_extent(trans, inode, path, extent_info, extent_info->data_len, drop_args.bytes_found); if (ret) { btrfs_abort_transaction(trans, ret); goto out_trans; } } out_trans: if (!trans) goto out_free; trans->block_rsv = &fs_info->trans_block_rsv; if (ret) btrfs_end_transaction(trans); else *trans_out = trans; out_free: btrfs_free_block_rsv(fs_info, rsv); out: return ret; } static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; struct btrfs_path *path; struct btrfs_trans_handle *trans = NULL; u64 lockstart; u64 lockend; u64 tail_start; u64 tail_len; u64 orig_start = offset; int ret = 0; bool same_block; u64 ino_size; bool truncated_block = false; bool updated_inode = false; btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); ret = btrfs_wait_ordered_range(inode, offset, len); if (ret) goto out_only_mutex; ino_size = round_up(inode->i_size, fs_info->sectorsize); ret = find_first_non_hole(BTRFS_I(inode), &offset, &len); if (ret < 0) goto out_only_mutex; if (ret && !len) { /* Already in a large hole */ ret = 0; goto out_only_mutex; } ret = file_modified(file); if (ret) goto out_only_mutex; lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); lockend = round_down(offset + len, btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset)) == (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)); /* * We needn't truncate any block which is beyond the end of the file * because we are sure there is no data there. */ /* * Only do this if we are in the same block and we aren't doing the * entire block. */ if (same_block && len < fs_info->sectorsize) { if (offset < ino_size) { truncated_block = true; ret = btrfs_truncate_block(BTRFS_I(inode), offset, len, 0); } else { ret = 0; } goto out_only_mutex; } /* zero back part of the first block */ if (offset < ino_size) { truncated_block = true; ret = btrfs_truncate_block(BTRFS_I(inode), offset, 0, 0); if (ret) { btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); return ret; } } /* Check the aligned pages after the first unaligned page, * if offset != orig_start, which means the first unaligned page * including several following pages are already in holes, * the extra check can be skipped */ if (offset == orig_start) { /* after truncate page, check hole again */ len = offset + len - lockstart; offset = lockstart; ret = find_first_non_hole(BTRFS_I(inode), &offset, &len); if (ret < 0) goto out_only_mutex; if (ret && !len) { ret = 0; goto out_only_mutex; } lockstart = offset; } /* Check the tail unaligned part is in a hole */ tail_start = lockend + 1; tail_len = offset + len - tail_start; if (tail_len) { ret = find_first_non_hole(BTRFS_I(inode), &tail_start, &tail_len); if (unlikely(ret < 0)) goto out_only_mutex; if (!ret) { /* zero the front end of the last page */ if (tail_start + tail_len < ino_size) { truncated_block = true; ret = btrfs_truncate_block(BTRFS_I(inode), tail_start + tail_len, 0, 1); if (ret) goto out_only_mutex; } } } if (lockend < lockstart) { ret = 0; goto out_only_mutex; } btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state); path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } ret = btrfs_replace_file_extents(BTRFS_I(inode), path, lockstart, lockend, NULL, &trans); btrfs_free_path(path); if (ret) goto out; ASSERT(trans != NULL); inode_inc_iversion(inode); inode->i_mtime = inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); updated_inode = true; btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); out_only_mutex: if (!updated_inode && truncated_block && !ret) { /* * If we only end up zeroing part of a page, we still need to * update the inode item, so that all the time fields are * updated as well as the necessary btrfs inode in memory fields * for detecting, at fsync time, if the inode isn't yet in the * log tree or it's there but not up to date. */ struct timespec64 now = current_time(inode); inode_inc_iversion(inode); inode->i_mtime = now; inode->i_ctime = now; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); } else { int ret2; ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); ret2 = btrfs_end_transaction(trans); if (!ret) ret = ret2; } } btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); return ret; } /* Helper structure to record which range is already reserved */ struct falloc_range { struct list_head list; u64 start; u64 len; }; /* * Helper function to add falloc range * * Caller should have locked the larger range of extent containing * [start, len) */ static int add_falloc_range(struct list_head *head, u64 start, u64 len) { struct falloc_range *range = NULL; if (!list_empty(head)) { /* * As fallocate iterates by bytenr order, we only need to check * the last range. */ range = list_last_entry(head, struct falloc_range, list); if (range->start + range->len == start) { range->len += len; return 0; } } range = kmalloc(sizeof(*range), GFP_KERNEL); if (!range) return -ENOMEM; range->start = start; range->len = len; list_add_tail(&range->list, head); return 0; } static int btrfs_fallocate_update_isize(struct inode *inode, const u64 end, const int mode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; int ret; int ret2; if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode)) return 0; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); inode->i_ctime = current_time(inode); i_size_write(inode, end); btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); ret2 = btrfs_end_transaction(trans); return ret ? ret : ret2; } enum { RANGE_BOUNDARY_WRITTEN_EXTENT, RANGE_BOUNDARY_PREALLOC_EXTENT, RANGE_BOUNDARY_HOLE, }; static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode, u64 offset) { const u64 sectorsize = btrfs_inode_sectorsize(inode); struct extent_map *em; int ret; offset = round_down(offset, sectorsize); em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize); if (IS_ERR(em)) return PTR_ERR(em); if (em->block_start == EXTENT_MAP_HOLE) ret = RANGE_BOUNDARY_HOLE; else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) ret = RANGE_BOUNDARY_PREALLOC_EXTENT; else ret = RANGE_BOUNDARY_WRITTEN_EXTENT; free_extent_map(em); return ret; } static int btrfs_zero_range(struct inode *inode, loff_t offset, loff_t len, const int mode) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct extent_map *em; struct extent_changeset *data_reserved = NULL; int ret; u64 alloc_hint = 0; const u64 sectorsize = btrfs_inode_sectorsize(BTRFS_I(inode)); u64 alloc_start = round_down(offset, sectorsize); u64 alloc_end = round_up(offset + len, sectorsize); u64 bytes_to_reserve = 0; bool space_reserved = false; em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, alloc_end - alloc_start); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; } /* * Avoid hole punching and extent allocation for some cases. More cases * could be considered, but these are unlikely common and we keep things * as simple as possible for now. Also, intentionally, if the target * range contains one or more prealloc extents together with regular * extents and holes, we drop all the existing extents and allocate a * new prealloc extent, so that we get a larger contiguous disk extent. */ if (em->start <= alloc_start && test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { const u64 em_end = em->start + em->len; if (em_end >= offset + len) { /* * The whole range is already a prealloc extent, * do nothing except updating the inode's i_size if * needed. */ free_extent_map(em); ret = btrfs_fallocate_update_isize(inode, offset + len, mode); goto out; } /* * Part of the range is already a prealloc extent, so operate * only on the remaining part of the range. */ alloc_start = em_end; ASSERT(IS_ALIGNED(alloc_start, sectorsize)); len = offset + len - alloc_start; offset = alloc_start; alloc_hint = em->block_start + em->len; } free_extent_map(em); if (BTRFS_BYTES_TO_BLKS(fs_info, offset) == BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) { em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, sectorsize); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; } if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { free_extent_map(em); ret = btrfs_fallocate_update_isize(inode, offset + len, mode); goto out; } if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) { free_extent_map(em); ret = btrfs_truncate_block(BTRFS_I(inode), offset, len, 0); if (!ret) ret = btrfs_fallocate_update_isize(inode, offset + len, mode); return ret; } free_extent_map(em); alloc_start = round_down(offset, sectorsize); alloc_end = alloc_start + sectorsize; goto reserve_space; } alloc_start = round_up(offset, sectorsize); alloc_end = round_down(offset + len, sectorsize); /* * For unaligned ranges, check the pages at the boundaries, they might * map to an extent, in which case we need to partially zero them, or * they might map to a hole, in which case we need our allocation range * to cover them. */ if (!IS_ALIGNED(offset, sectorsize)) { ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode), offset); if (ret < 0) goto out; if (ret == RANGE_BOUNDARY_HOLE) { alloc_start = round_down(offset, sectorsize); ret = 0; } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) { ret = btrfs_truncate_block(BTRFS_I(inode), offset, 0, 0); if (ret) goto out; } else { ret = 0; } } if (!IS_ALIGNED(offset + len, sectorsize)) { ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode), offset + len); if (ret < 0) goto out; if (ret == RANGE_BOUNDARY_HOLE) { alloc_end = round_up(offset + len, sectorsize); ret = 0; } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) { ret = btrfs_truncate_block(BTRFS_I(inode), offset + len, 0, 1); if (ret) goto out; } else { ret = 0; } } reserve_space: if (alloc_start < alloc_end) { struct extent_state *cached_state = NULL; const u64 lockstart = alloc_start; const u64 lockend = alloc_end - 1; bytes_to_reserve = alloc_end - alloc_start; ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), bytes_to_reserve); if (ret < 0) goto out; space_reserved = true; btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state); ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, alloc_start, bytes_to_reserve); if (ret) { unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); goto out; } ret = btrfs_prealloc_file_range(inode, mode, alloc_start, alloc_end - alloc_start, i_blocksize(inode), offset + len, &alloc_hint); unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); /* btrfs_prealloc_file_range releases reserved space on error */ if (ret) { space_reserved = false; goto out; } } ret = btrfs_fallocate_update_isize(inode, offset + len, mode); out: if (ret && space_reserved) btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, alloc_start, bytes_to_reserve); extent_changeset_free(data_reserved); return ret; } static long btrfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct extent_state *cached_state = NULL; struct extent_changeset *data_reserved = NULL; struct falloc_range *range; struct falloc_range *tmp; struct list_head reserve_list; u64 cur_offset; u64 last_byte; u64 alloc_start; u64 alloc_end; u64 alloc_hint = 0; u64 locked_end; u64 actual_end = 0; u64 data_space_needed = 0; u64 data_space_reserved = 0; u64 qgroup_reserved = 0; struct extent_map *em; int blocksize = btrfs_inode_sectorsize(BTRFS_I(inode)); int ret; /* Do not allow fallocate in ZONED mode */ if (btrfs_is_zoned(btrfs_sb(inode->i_sb))) return -EOPNOTSUPP; alloc_start = round_down(offset, blocksize); alloc_end = round_up(offset + len, blocksize); cur_offset = alloc_start; /* Make sure we aren't being give some crap mode */ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) return btrfs_punch_hole(file, offset, len); btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) { ret = inode_newsize_ok(inode, offset + len); if (ret) goto out; } ret = file_modified(file); if (ret) goto out; /* * TODO: Move these two operations after we have checked * accurate reserved space, or fallocate can still fail but * with page truncated or size expanded. * * But that's a minor problem and won't do much harm BTW. */ if (alloc_start > inode->i_size) { ret = btrfs_cont_expand(BTRFS_I(inode), i_size_read(inode), alloc_start); if (ret) goto out; } else if (offset + len > inode->i_size) { /* * If we are fallocating from the end of the file onward we * need to zero out the end of the block if i_size lands in the * middle of a block. */ ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0); if (ret) goto out; } /* * We have locked the inode at the VFS level (in exclusive mode) and we * have locked the i_mmap_lock lock (in exclusive mode). Now before * locking the file range, flush all dealloc in the range and wait for * all ordered extents in the range to complete. After this we can lock * the file range and, due to the previous locking we did, we know there * can't be more delalloc or ordered extents in the range. */ ret = btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); if (ret) goto out; if (mode & FALLOC_FL_ZERO_RANGE) { ret = btrfs_zero_range(inode, offset, len, mode); btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); return ret; } locked_end = alloc_end - 1; lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state); btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end); /* First, check if we exceed the qgroup limit */ INIT_LIST_HEAD(&reserve_list); while (cur_offset < alloc_end) { em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, alloc_end - cur_offset); if (IS_ERR(em)) { ret = PTR_ERR(em); break; } last_byte = min(extent_map_end(em), alloc_end); actual_end = min_t(u64, extent_map_end(em), offset + len); last_byte = ALIGN(last_byte, blocksize); if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { const u64 range_len = last_byte - cur_offset; ret = add_falloc_range(&reserve_list, cur_offset, range_len); if (ret < 0) { free_extent_map(em); break; } ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, cur_offset, range_len); if (ret < 0) { free_extent_map(em); break; } qgroup_reserved += range_len; data_space_needed += range_len; } free_extent_map(em); cur_offset = last_byte; } if (!ret && data_space_needed > 0) { /* * We are safe to reserve space here as we can't have delalloc * in the range, see above. */ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), data_space_needed); if (!ret) data_space_reserved = data_space_needed; } /* * If ret is still 0, means we're OK to fallocate. * Or just cleanup the list and exit. */ list_for_each_entry_safe(range, tmp, &reserve_list, list) { if (!ret) { ret = btrfs_prealloc_file_range(inode, mode, range->start, range->len, i_blocksize(inode), offset + len, &alloc_hint); /* * btrfs_prealloc_file_range() releases space even * if it returns an error. */ data_space_reserved -= range->len; qgroup_reserved -= range->len; } else if (data_space_reserved > 0) { btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, range->start, range->len); data_space_reserved -= range->len; qgroup_reserved -= range->len; } else if (qgroup_reserved > 0) { btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved, range->start, range->len); qgroup_reserved -= range->len; } list_del(&range->list); kfree(range); } if (ret < 0) goto out_unlock; /* * We didn't need to allocate any more space, but we still extended the * size of the file so we need to update i_size and the inode item. */ ret = btrfs_fallocate_update_isize(inode, actual_end, mode); out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state); out: btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); extent_changeset_free(data_reserved); return ret; } static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, int whence) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_map *em = NULL; struct extent_state *cached_state = NULL; loff_t i_size = inode->vfs_inode.i_size; u64 lockstart; u64 lockend; u64 start; u64 len; int ret = 0; if (i_size == 0 || offset >= i_size) return -ENXIO; /* * offset can be negative, in this case we start finding DATA/HOLE from * the very start of the file. */ start = max_t(loff_t, 0, offset); lockstart = round_down(start, fs_info->sectorsize); lockend = round_up(i_size, fs_info->sectorsize); if (lockend <= lockstart) lockend = lockstart + fs_info->sectorsize; lockend--; len = lockend - lockstart + 1; lock_extent_bits(&inode->io_tree, lockstart, lockend, &cached_state); while (start < i_size) { em = btrfs_get_extent_fiemap(inode, start, len); if (IS_ERR(em)) { ret = PTR_ERR(em); em = NULL; break; } if (whence == SEEK_HOLE && (em->block_start == EXTENT_MAP_HOLE || test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) break; else if (whence == SEEK_DATA && (em->block_start != EXTENT_MAP_HOLE && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) break; start = em->start + em->len; free_extent_map(em); em = NULL; cond_resched(); } free_extent_map(em); unlock_extent_cached(&inode->io_tree, lockstart, lockend, &cached_state); if (ret) { offset = ret; } else { if (whence == SEEK_DATA && start >= i_size) offset = -ENXIO; else offset = min_t(loff_t, start, i_size); } return offset; } static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; switch (whence) { default: return generic_file_llseek(file, offset, whence); case SEEK_DATA: case SEEK_HOLE: btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); offset = find_desired_extent(BTRFS_I(inode), offset, whence); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); break; } if (offset < 0) return offset; return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); } static int btrfs_file_open(struct inode *inode, struct file *filp) { int ret; filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; ret = fsverity_file_open(inode, filp); if (ret) return ret; return generic_file_open(inode, filp); } static int check_direct_read(struct btrfs_fs_info *fs_info, const struct iov_iter *iter, loff_t offset) { int ret; int i, seg; ret = check_direct_IO(fs_info, iter, offset); if (ret < 0) return ret; if (!iter_is_iovec(iter)) return 0; for (seg = 0; seg < iter->nr_segs; seg++) for (i = seg + 1; i < iter->nr_segs; i++) if (iter->iov[seg].iov_base == iter->iov[i].iov_base) return -EINVAL; return 0; } static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); size_t prev_left = 0; ssize_t read = 0; ssize_t ret; if (fsverity_active(inode)) return 0; if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos)) return 0; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); again: /* * This is similar to what we do for direct IO writes, see the comment * at btrfs_direct_write(), but we also disable page faults in addition * to disabling them only at the iov_iter level. This is because when * reading from a hole or prealloc extent, iomap calls iov_iter_zero(), * which can still trigger page fault ins despite having set ->nofault * to true of our 'to' iov_iter. * * The difference to direct IO writes is that we deadlock when trying * to lock the extent range in the inode's tree during he page reads * triggered by the fault in (while for writes it is due to waiting for * our own ordered extent). This is because for direct IO reads, * btrfs_dio_iomap_begin() returns with the extent range locked, which * is only unlocked in the endio callback (end_bio_extent_readpage()). */ pagefault_disable(); to->nofault = true; ret = btrfs_dio_rw(iocb, to, read); to->nofault = false; pagefault_enable(); /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) read = ret; if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) { const size_t left = iov_iter_count(to); if (left == prev_left) { /* * We didn't make any progress since the last attempt, * fallback to a buffered read for the remainder of the * range. This is just to avoid any possibility of looping * for too long. */ ret = read; } else { /* * We made some progress since the last retry or this is * the first time we are retrying. Fault in as many pages * as possible and retry. */ fault_in_iov_iter_writeable(to, left); prev_left = left; goto again; } } btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); return ret < 0 ? ret : read; } static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { ssize_t ret = 0; if (iocb->ki_flags & IOCB_DIRECT) { ret = btrfs_direct_read(iocb, to); if (ret < 0 || !iov_iter_count(to) || iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp))) return ret; } return filemap_read(iocb, to, ret); } const struct file_operations btrfs_file_operations = { .llseek = btrfs_file_llseek, .read_iter = btrfs_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, .splice_write = iter_file_splice_write, .mmap = btrfs_file_mmap, .open = btrfs_file_open, .release = btrfs_release_file, .fsync = btrfs_sync_file, .fallocate = btrfs_fallocate, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, #endif .remap_file_range = btrfs_remap_file_range, }; void __cold btrfs_auto_defrag_exit(void) { kmem_cache_destroy(btrfs_inode_defrag_cachep); } int __init btrfs_auto_defrag_init(void) { btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", sizeof(struct inode_defrag), 0, SLAB_MEM_SPREAD, NULL); if (!btrfs_inode_defrag_cachep) return -ENOMEM; return 0; } int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) { int ret; /* * So with compression we will find and lock a dirty page and clear the * first one as dirty, setup an async extent, and immediately return * with the entire range locked but with nobody actually marked with * writeback. So we can't just filemap_write_and_wait_range() and * expect it to work since it will just kick off a thread to do the * actual work. So we need to call filemap_fdatawrite_range _again_ * since it will wait on the page lock, which won't be unlocked until * after the pages have been marked as writeback and so we're good to go * from there. We have to do this otherwise we'll miss the ordered * extents and that results in badness. Please Josef, do not think you * know better and pull this out at some point in the future, it is * right and you are wrong. */ ret = filemap_fdatawrite_range(inode->i_mapping, start, end); if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) ret = filemap_fdatawrite_range(inode->i_mapping, start, end); return ret; }